From 0da04e49a1385c6cf95ce4ed943a1d47d6eab4c3 Mon Sep 17 00:00:00 2001 From: cuihubin <530051970@qq.com> Date: Fri, 19 Jan 2024 11:01:00 +0800 Subject: [PATCH 001/112] batch create&reload db --- source/constructs/api/common/constant.py | 2 + source/constructs/api/common/enum.py | 6 +- source/constructs/api/data_source/crud.py | 38 ++- source/constructs/api/data_source/main.py | 16 +- source/constructs/api/data_source/service.py | 268 ++++++++++++++---- source/constructs/api/requirements.txt | 3 +- .../admin/database/whole/10_data_source.sql | 1 - source/portal/public/locales/en/common.json | 1 + .../portal/public/locales/en/datasource.json | 4 + source/portal/public/locales/zh/common.json | 1 + .../portal/public/locales/zh/datasource.json | 4 + source/portal/src/apis/data-source/api.ts | 8 +- .../src/pages/account-management/index.tsx | 9 +- .../src/pages/batch-operation/index.tsx | 84 ++++++ .../componments/DataSourceList.tsx | 22 +- source/portal/src/routers/routerEnum.tsx | 5 + 16 files changed, 410 insertions(+), 62 deletions(-) create mode 100644 source/portal/src/pages/batch-operation/index.tsx diff --git a/source/constructs/api/common/constant.py b/source/constructs/api/common/constant.py index 0a549ab0..c3574119 100644 --- a/source/constructs/api/common/constant.py +++ b/source/constructs/api/common/constant.py @@ -96,6 +96,8 @@ def __setattr__(self, name, value): const.PUBLIC = 'Public' const.PRIVATE = 'Private' const.ZERO = 0 +const.BATCH_CREATE_LIMIT = 100 +const.BATCH_SHEET = "OriginTemplate" const.UNSTRUCTURED_FILES = { "document": ["doc", "docx", "pdf", "ppt", "pptx", "xls", "xlsx", "odp"], diff --git a/source/constructs/api/common/enum.py b/source/constructs/api/common/enum.py index 1cfd76fa..cc75d722 100644 --- a/source/constructs/api/common/enum.py +++ b/source/constructs/api/common/enum.py @@ -123,8 +123,10 @@ class MessageEnum(Enum): SOURCE_JDBC_ALREADY_IMPORTED = {1255: "JDBC connection with the same instance already be imported"} SOURCE_JDBC_LIST_DATABASES_NOT_SUPPORTED = {1256: "JDBC list databases not supported."} SOURCE_JDBC_LIST_DATABASES_FAILED = {1257: "JDBC list databases failed."} - SOURCE_ACCOUNT_ID_ALREADY_EXISTS = {1256: "A duplicate account with the same name already exists. Please note that account names must be unique."} - + SOURCE_ACCOUNT_ID_ALREADY_EXISTS = {1258: "A duplicate account with the same name already exists. Please note that account names must be unique."} + SOURCE_BATCH_CREATE_FORMAT_ERR = {1259: "Invalid file type, please provide an Excel file (.xlsx)."} + SOURCE_BATCH_CREATE_LIMIT_ERR = {1260: "Batch operation limit exceeded, please ensure that a maximum of 100 data sources are created at a time."} + SOURCE_BATCH_SHEET_NOT_FOUND = {1261: "Sheet [OriginTemplate] not found in the Excel file"} # label LABEL_EXIST_FAILED = {1611: "Cannot create duplicated label"} diff --git a/source/constructs/api/data_source/crud.py b/source/constructs/api/data_source/crud.py index f99516ad..4eb220e1 100644 --- a/source/constructs/api/data_source/crud.py +++ b/source/constructs/api/data_source/crud.py @@ -866,7 +866,7 @@ def copy_properties(jdbc_instance_target: JDBCInstanceSource, jdbc_instance_orig jdbc_instance_target.region = jdbc_instance_origin.region # jdbc_instance_target.data_source_id = jdbc_instance_origin.data_source_id # jdbc_instance_target.detection_history_id = jdbc_instance_origin.detection_history_id - # jdbc_instance_target.glue_database = jdbc_instance_origin.glue_database + jdbc_instance_target.glue_database = jdbc_instance_origin.glue_database # jdbc_instance_target.glue_crawler = jdbc_instance_origin.glue_crawler jdbc_instance_target.glue_connection = jdbc_instance_origin.glue_connection # jdbc_instance_target.glue_vpc_endpoint = jdbc_instance_origin.glue_vpc_endpoint @@ -959,3 +959,39 @@ def get_total_glue_database_count(): def get_connected_glue_database_count(): list = list_glue_database_source_without_condition() return 0 if not list else list.filter(SourceGlueDatabase.glue_state == ConnectionState.ACTIVE.value).count() + +def get_schema_by_snapshot(provider_id, account_id, instance, region): + return get_session().query(JDBCInstanceSource.jdbc_connection_schema, JDBCInstanceSource.network_subnet_id) \ + .filter(JDBCInstanceSource.account_provider_id == provider_id) \ + .filter(JDBCInstanceSource.account_id == account_id) \ + .filter(JDBCInstanceSource.instance_id == instance) \ + .filter(JDBCInstanceSource.region == region).all() + +def get_connection_by_instance(provider_id, account_id, instance, region): + return get_session().query(JDBCInstanceSource.glue_connection) \ + .filter(JDBCInstanceSource.account_provider_id == provider_id) \ + .filter(JDBCInstanceSource.account_id == account_id) \ + .filter(JDBCInstanceSource.instance_id == instance) \ + .filter(JDBCInstanceSource.region == region).all() + +def get_crawler_glueDB_by_instance(provider_id, account_id, instance, region): + return get_session().query(JDBCInstanceSource.glue_crawler, JDBCInstanceSource.glue_database, JDBCInstanceSource.glue_connection) \ + .filter(JDBCInstanceSource.account_provider_id == provider_id) \ + .filter(JDBCInstanceSource.account_id == account_id) \ + .filter(JDBCInstanceSource.instance_id == instance) \ + .filter(JDBCInstanceSource.region == region).all() + +def get_enable_account_list(): + return get_session().query(Account.account_provider_id, Account.account_id, Account.region) \ + .filter(Account.status == SourceAccountStatus.ENABLE.value).all() + +def update_schema_by_account(provider_id, account_id, instance, region, schema): + session = get_session() + jdbc_instance_source = session.query(JDBCInstanceSource).filter(JDBCInstanceSource.account_provider_id == provider_id, + JDBCInstanceSource.region == region, + JDBCInstanceSource.account_id == account_id, + JDBCInstanceSource.instance_id == instance).first() + if not jdbc_instance_source: + jdbc_instance_source.jdbc_connection_schema = schema + session.merge(jdbc_instance_source) + session.commit() diff --git a/source/constructs/api/data_source/main.py b/source/constructs/api/data_source/main.py index 135ec4d1..3049c469 100644 --- a/source/constructs/api/data_source/main.py +++ b/source/constructs/api/data_source/main.py @@ -1,4 +1,6 @@ -from fastapi import APIRouter +from io import BytesIO +from typing import List +from fastapi import APIRouter, File, UploadFile from fastapi_pagination import Page, Params from fastapi_pagination.ext.sqlalchemy import paginate @@ -190,8 +192,6 @@ def hide_glue_database(glueDatabase: schemas.SourceDeteteGlueDatabase): glueDatabase.name ) - - @router.post("/sync-glue-database", response_model=BaseResponse) @inject_session def sync_glue_database(glueDatabase: schemas.SourceGlueDatabaseBase): @@ -384,3 +384,13 @@ def query_connection_detail(account: schemas.JDBCInstanceSourceBase): @inject_session def list_jdbc_databases(source: schemas.JdbcSource): return service.list_jdbc_databases(source) + +@router.post("/batch-create", response_model=BaseResponse) +@inject_session +def batch_create(files: List[UploadFile] = File(...)): + return service.batch_create(files[0]) + +# @router.post("/snapshop", response_model=BaseResponse) +# @inject_session +# def get_schema_by_snapshot(provider_id: int, account_id: str, instance: str, region: str): +# return service.get_schema_by_snapshot(provider_id, account_id, instance, region) diff --git a/source/constructs/api/data_source/service.py b/source/constructs/api/data_source/service.py index 79de4a87..69397ed5 100644 --- a/source/constructs/api/data_source/service.py +++ b/source/constructs/api/data_source/service.py @@ -1,3 +1,6 @@ +import asyncio +from datetime import datetime +from io import BytesIO import json import os import re @@ -6,6 +9,9 @@ from time import sleep import boto3 +from fastapi import File, UploadFile +import openpyxl +import pandas as pd import pymysql from botocore.exceptions import ClientError @@ -22,8 +28,7 @@ from common.exception_handler import BizException from common.query_condition import QueryCondition from db.models_data_source import (Account, - JDBCInstanceSource, - SourceRegion) + JDBCInstanceSource) from discovery_job.service import can_delete_database as can_delete_job_database from discovery_job.service import delete_account as delete_job_by_account from discovery_job.service import delete_database as delete_job_database @@ -70,7 +75,6 @@ r'jdbc:sqlserver://[\w.-]+:\d+;database=([\w-]+)' ] - def build_s3_targets(bucket, credentials, region, is_init): s3 = boto3.client('s3', aws_access_key_id=credentials['AccessKeyId'], @@ -377,8 +381,7 @@ def sync_glue_database(account_id, region, glue_database_name): def sync_jdbc_connection(jdbc: JDBCInstanceSourceBase): - account_id = jdbc.account_id if jdbc.account_provider_id == Provider.AWS_CLOUD.value else admin_account_id - region = jdbc.region if jdbc.account_provider_id == Provider.AWS_CLOUD.value else admin_region + account_id, region = __get_admin_info(jdbc) ec2_client, credentials = __ec2(account=account_id, region=region) glue_client = __glue(account=account_id, region=region) lakeformation_client = __lakeformation(account=account_id, region=region) @@ -491,9 +494,7 @@ def condition_check(ec2_client, credentials, state, connection: dict): def sync(glue, lakeformation, credentials, crawler_role_arn, jdbc: JDBCInstanceSourceBase, url: str, schemas: str): jdbc_targets = [] - database_type = convert_provider_id_2_database_type(jdbc.account_provider_id) - glue_database_name = f"{const.SOLUTION_NAME}-{database_type}-{jdbc.instance_id}" - crawler_name = f"{const.SOLUTION_NAME}-{database_type}-{jdbc.instance_id}" + _, glue_database_name, crawler_name = __gen_resources_name(jdbc) state, glue_connection_name = crud.get_jdbc_connection_glue_info(jdbc.account_provider_id, jdbc.account_id, jdbc.region, jdbc.instance_id) if state == ConnectionState.CRAWLING.value: raise BizException(MessageEnum.SOURCE_CONNECTION_CRAWLING.get_code(), @@ -1155,12 +1156,6 @@ def before_delete_rds_connection(account: str, region: str, instance: str): if rds_instance is None: raise BizException(MessageEnum.SOURCE_RDS_NO_INSTANCE.get_code(), MessageEnum.SOURCE_RDS_NO_INSTANCE.get_msg()) - # if rds_instance.glue_crawler is None: - # raise BizException(MessageEnum.SOURCE_RDS_NO_CRAWLER.get_code(), - # MessageEnum.SOURCE_RDS_NO_CRAWLER.get_msg()) - # if rds_instance.glue_database is None: - # raise BizException(MessageEnum.SOURCE_RDS_NO_DATABASE.get_code(), - # MessageEnum.SOURCE_RDS_NO_DATABASE.get_msg()) # crawler, if crawling try to stop and raise, if pending raise directly state = crud.get_rds_instance_source_glue_state(account, region, instance) if state == ConnectionState.PENDING.value: @@ -1651,8 +1646,7 @@ def import_glue_database(glueDataBase: SourceGlueDatabaseBase): def update_jdbc_conn(jdbc_conn: JDBCInstanceSource): get_db_names(jdbc_conn.jdbc_connection_url, jdbc_conn.jdbc_connection_schema) - account_id = jdbc_conn.account_id if jdbc_conn.account_provider_id == Provider.AWS_CLOUD.value else admin_account_id - region = jdbc_conn.region if jdbc_conn.account_provider_id == Provider.AWS_CLOUD.value else admin_region + account_id, region = __get_admin_info(jdbc_conn) res: JDBCInstanceSourceFullInfo = crud.get_jdbc_instance_source_glue(jdbc_conn.account_provider_id, jdbc_conn.account_id, jdbc_conn.region, jdbc_conn.instance_id) check_connection(res, jdbc_conn, account_id, region) update_connection(res, jdbc_conn, account_id, region) @@ -1708,21 +1702,20 @@ def __validate_jdbc_url(url: str): if re.match(pattern, url): return True - def add_jdbc_conn(jdbcConn: JDBCInstanceSource): + jdbc_targets = [] get_db_names(jdbcConn.jdbc_connection_url, jdbcConn.jdbc_connection_schema) - - account_id = jdbcConn.account_id if jdbcConn.account_provider_id == Provider.AWS_CLOUD.value else admin_account_id - region = jdbcConn.region if jdbcConn.account_provider_id == Provider.AWS_CLOUD.value else admin_region + account_id, region = __get_admin_info(jdbcConn) + crawler_role_arn = __gen_role_arn(account_id=account_id, + region=region, + role_name='GlueDetectionJobRole') list = crud.list_jdbc_instance_source_by_instance_id_account(jdbcConn, account_id) if list: raise BizException(MessageEnum.SOURCE_JDBC_ALREADY_EXISTS.get_code(), MessageEnum.SOURCE_JDBC_ALREADY_EXISTS.get_msg()) - database_type = convert_provider_id_2_database_type(jdbcConn.account_provider_id) - glue_connection_name = f"{const.SOLUTION_NAME}-{database_type}-{jdbcConn.instance_id}" - # network_availability_zone by subnetId + glue_connection_name, glue_database_name, crawler_name = __gen_resources_name(jdbcConn) ec2_client, __ = __ec2(account=account_id, region=region) - # return availability_zone + glue = __get_glue_client(account=account_id, region=region) try: availability_zone = ec2_client.describe_subnets(SubnetIds=[jdbcConn.network_subnet_id])['Subnets'][0]['AvailabilityZone'] try: @@ -1758,6 +1751,32 @@ def add_jdbc_conn(jdbcConn: JDBCInstanceSource): if response['ResponseMetadata']['HTTPStatusCode'] != 200: raise BizException(MessageEnum.SOURCE_JDBC_CREATE_FAIL.get_code(), MessageEnum.SOURCE_JDBC_CREATE_FAIL.get_msg()) + + # Create Crawler + db_names = get_db_names(jdbcConn.jdbc_connection_url, jdbcConn.jdbc_connection_schema) + for db_name in db_names: + trimmed_db_name = db_name.strip() + if trimmed_db_name: + jdbc_targets.append({ + 'ConnectionName': glue_connection_name, + 'Path': f"{trimmed_db_name}/%" + }) + try: + response = glue.create_crawler( + Name=crawler_name, + Role=crawler_role_arn, + DatabaseName=glue_database_name, + Targets={ + 'JdbcTargets': jdbc_targets, + }, + Tags={ + const.TAG_KEY: const.TAG_VALUE, + const.TAG_ADMIN_ACCOUNT_ID: admin_account_id + }, + ) + except Exception: + logger.error(traceback.format_exc()) + jdbcConn.network_availability_zone = availability_zone jdbcConn.create_type = JDBCCreateType.ADD.value jdbc_conn_insert = JDBCInstanceSourceFullInfo() @@ -1772,7 +1791,6 @@ def add_jdbc_conn(jdbcConn: JDBCInstanceSource): jdbc_conn_insert.jdbc_enforce_ssl = jdbcConn.jdbc_enforce_ssl jdbc_conn_insert.kafka_ssl_enabled = jdbcConn.kafka_ssl_enabled jdbc_conn_insert.master_username = jdbcConn.master_username - # jdbc_conn_insert.password = jdbcConn.password jdbc_conn_insert.skip_custom_jdbc_cert_validation = jdbcConn.skip_custom_jdbc_cert_validation jdbc_conn_insert.custom_jdbc_cert = jdbcConn.custom_jdbc_cert jdbc_conn_insert.custom_jdbc_cert_string = jdbcConn.custom_jdbc_cert_string @@ -1784,8 +1802,9 @@ def add_jdbc_conn(jdbcConn: JDBCInstanceSource): jdbc_conn_insert.jdbc_driver_class_name = jdbcConn.jdbc_driver_class_name jdbc_conn_insert.jdbc_driver_jar_uri = jdbcConn.jdbc_driver_jar_uri jdbc_conn_insert.create_type = jdbcConn.create_type - # jdbc_conn_insert.connection_status = 'UNCONNECTED' jdbc_conn_insert.glue_connection = glue_connection_name + jdbc_conn_insert.glue_crawler = crawler_name + jdbc_conn_insert.glue_database = glue_database_name crud.add_jdbc_conn(jdbc_conn_insert) except ClientError as ce: logger.error(traceback.format_exc()) @@ -1826,7 +1845,7 @@ def gen_conn_properties(jdbcConn): def test_jdbc_conn(jdbc_conn_param: JDBCInstanceSourceBase): res = "FAIL" - account_id, region = gen_assume_info(jdbc_conn_param) + account_id, region = __get_admin_info(jdbc_conn_param) cursor = None connection = None # get connection name from sdp db @@ -1873,8 +1892,7 @@ def import_jdbc_conn(jdbc_conn: JDBCInstanceSourceBase): if crud.list_jdbc_connection_by_connection(jdbc_conn.instance_id): raise BizException(MessageEnum.SOURCE_JDBC_ALREADY_IMPORTED.get_code(), MessageEnum.SOURCE_JDBC_ALREADY_IMPORTED.get_msg()) - account_id = jdbc_conn.account_id if jdbc_conn.account_provider_id == Provider.AWS_CLOUD.value else admin_account_id - region = jdbc_conn.region if jdbc_conn.account_provider_id == Provider.AWS_CLOUD.value else admin_region + account_id, region = __get_admin_info() try: res_connection = __glue(account_id, region).get_connection(Name=jdbc_conn.instance_id)['Connection'] except ClientError as ce: @@ -2304,16 +2322,8 @@ def __delete_account(account_id: str, region: str): def query_glue_connections(account: AccountInfo): - res = [] - list = [] - account_id = account.account_id if account.account_provider_id == Provider.AWS_CLOUD.value else admin_account_id - region = account.region if account.account_provider_id == Provider.AWS_CLOUD.value else admin_region - - # list = __glue(account=account_id, region=region).get_connections(CatalogId=account_id, - # Filter={'ConnectionType': 'JDBC'}, - # MaxResults=100, - # HidePassword=True)['ConnectionList'] - + res, list = [] + account_id, region = __get_admin_info(account) next_token = "" while True: @@ -2342,7 +2352,7 @@ def query_jdbc_connections_sub_info(): return crud.query_jdbc_connections_sub_info() def list_buckets(account: AdminAccountInfo): - _, region = gen_assume_info(account) + _, region = __get_admin_info(account) iam_role_name = crud.get_iam_role(account.account_id) assumed_role = sts.assume_role(RoleArn=f"{iam_role_name}", RoleSessionName="glue-s3-connection") @@ -2359,10 +2369,9 @@ def query_glue_databases(account: AdminAccountInfo): return __glue(account=account.account_id, region=account.region).get_databases()['DatabaseList'] def query_account_network(account: AccountInfo): - accont_id = account.account_id if account.account_provider_id == Provider.AWS_CLOUD.value else admin_account_id - region = account.region if account.region == Provider.AWS_CLOUD.value else admin_region - logger.info(f'accont_id is:{accont_id},region is {region}') - ec2_client, __ = __ec2(account=accont_id, region=region) + account_id, region = __get_admin_info(account) + logger.info(f'accont_id is:{account_id},region is {region}') + ec2_client, __ = __ec2(account=account_id, region=region) vpcs = query_all_vpc(ec2_client) # vpcs = [vpc['VpcId'] for vpc in query_all_vpc(ec2_client)] vpc_list = [{"vpcId": vpc.get('VpcId'), "name": gen_resource_name(vpc)} for vpc in vpcs] @@ -2439,12 +2448,6 @@ def gen_resource_name(resource): else: return '-' -def gen_assume_info(account): - accont_id = account.account_id if account.account_provider_id == Provider.AWS_CLOUD.value else admin_account_id - region = account.region if account.region == Provider.AWS_CLOUD.value else admin_region - return accont_id, region - - def test_glue_conn(account, connection): return boto3.client('glue').start_connection_test( CatalogId=account, @@ -2568,7 +2571,7 @@ def grant_lake_formation_permission(credentials, crawler_role_arn, glue_database def query_connection_detail(account: JDBCInstanceSourceBase): - account_id, region = gen_assume_info(account) + account_id, region = __get_admin_info(account) source: JDBCInstanceSourceFullInfo = crud.get_jdbc_instance_source_glue(provider_id=account.account_provider_id, account=account.account_id, region=account.region, @@ -2581,11 +2584,31 @@ def query_connection_detail(account: JDBCInstanceSourceBase): conn['ConnectionProperties']['JDBC_CONNECTION_SCHEMA'] = source.jdbc_connection_schema return conn +def __gen_resources_name(jdbc): + database_type = convert_provider_id_2_database_type(jdbc.account_provider_id) + glue_connection_name = f"{const.SOLUTION_NAME}-{database_type}-{jdbc.instance_id}" + glue_database_name = f"{const.SOLUTION_NAME}-{database_type}-{jdbc.instance_id}" + crawler_name = f"{const.SOLUTION_NAME}-{database_type}-{jdbc.instance_id}" + return glue_connection_name, glue_database_name, crawler_name def __get_excludes_file_exts(): extensions = list(set([ext for extensions_list in const.UNSTRUCTURED_FILES.values() for ext in extensions_list])) return ["*.{" + ",".join(extensions) + "}"] +def __get_glue_client(account, region): + iam_role_name = crud.get_iam_role(account) + assumed_role = sts.assume_role( + RoleArn=f"{iam_role_name}", + RoleSessionName="glue-connection" + ) + credentials = assumed_role['Credentials'] + glue = boto3.client('glue', + aws_access_key_id=credentials['AccessKeyId'], + aws_secret_access_key=credentials['SecretAccessKey'], + aws_session_token=credentials['SessionToken'], + region_name=region + ) + return glue def list_jdbc_databases(source: JdbcSource) -> list[str]: url_arr = source.connection_url.split(":") @@ -2607,3 +2630,146 @@ def list_jdbc_databases(source: JdbcSource) -> list[str]: databases = mysql_database.list_databases() logger.info(databases) return databases + + +def batch_create(file: UploadFile = File(...)): + time_str = time.time() + # batch_id=f"batch_create_jdbc_{time_str}" + jdbc_from_excel_set = set() + created_jdbc_list = [] + # Check if the file is an Excel file + if not file.filename.endswith('.xlsx'): + raise BizException(MessageEnum.SOURCE_BATCH_CREATE_FORMAT_ERR.get_code(), + MessageEnum.SOURCE_BATCH_CREATE_FORMAT_ERR.get_msg()) + # Read the Excel file + content = file.file.read() + workbook = openpyxl.load_workbook(BytesIO(content), read_only=False) + try: + sheet = workbook[const.BATCH_SHEET] + except KeyError: + raise BizException(MessageEnum.SOURCE_BATCH_SHEET_NOT_FOUND.get_code(), + MessageEnum.SOURCE_BATCH_SHEET_NOT_FOUND.get_msg()) + header = [cell for cell in sheet.iter_rows(min_row=2, max_row=2, values_only=True)][0] + max_column = sheet.max_column + sheet.insert_cols(max_column + 1, amount=2) + sheet.cell(row=2, column=max_column + 1, value="Result") + sheet.cell(row=2, column=max_column + 2, value="Details") + accounts = crud.get_enable_account_list() + accounts_list = [f"{account[0]}/{account[1]}/{account[2]}" for account in accounts] + for row_index, row in enumerate(sheet.iter_rows(min_row=3), start=2): + if all(cell.value is None for cell in row): + continue + if any(not cell.value for cell in [row[0], row[1], row[3], row[5], row[6], row[7], row[8], row[9]]): + __add_error_msg(sheet, max_column, row_index, "Fields cannot be empty") + elif sheet.cell(row=row_index + 1, column=2).value not in [0, 1]: + __add_error_msg(sheet, max_column, row_index, f"The value of {header[1]} must be 0 or 1") + elif not __validate_jdbc_url(str(row[3].value)): + __add_error_msg(sheet, max_column, row_index, f"The value of {header[3]} must be in the format jdbc:protocol://host:port") + elif f"{row[0].value}/{row[7].value}/{row[8].value}/{row[9].value}" in jdbc_from_excel_set: + __add_error_msg(sheet, max_column, row_index, f"The value of {header[0]}, {header[7]}, {header[8]}, {header[9]} already exist in the preceding rows") + elif f"{row[9].value}/{row[7].value}/{row[8].value}" not in accounts_list: + __add_error_msg(sheet, max_column, row_index, "The account is not existed!") + else: + jdbc_from_excel_set.add(f"{row[0].value}/{row[7].value}/{row[8].value}/{row[9].value}") + created_jdbc_list.append(__gen_created_jdbc(row)) + batch_create_jdbc(created_jdbc_list) + # TODO:write into excel + # TODO:upload to S3 + for row_num, row in enumerate(sheet.iter_rows(values_only=True, min_row=3)): + print(f"{row}") + return time_str + + +def __add_error_msg(sheet, max_column, row_index, msg): + sheet.cell(row=row_index + 1, column=max_column + 1, value="FAILED") + sheet.cell(row=row_index + 1, column=max_column + 2, value=msg) + # print(f"$$$$$$$$$ content is : {content}") + # df = pd.read_excel(BytesIO(content), engine='openpyxl') + # print(f"$$$$$$$$$ lines is : {df.shape[0]}") + # df = pd.read_excel(file) + # if df.shape[0] > const.BATCH_CREATE_LIMIT + 2: + # raise BizException(MessageEnum.SOURCE_BATCH_CREATE_LIMIT_ERR.get_code(), + # MessageEnum.SOURCE_BATCH_CREATE_LIMIT_ERR.get_msg()) + # print(f"$$$$$$$$${df.to_json(orient='records')}") + + # Further processing if needed + # jdbc_list = df.to_json(orient='records') + # asyncio.run(batch_create_jdbc(jdbc_list)) + + +def __gen_created_jdbc(row): + created_jdbc = JDBCInstanceSource() + # TODO + return created_jdbc + + +async def batch_create_jdbc(jdbc_list): + tasks = [add_jdbc_conn(jdbc) for jdbc in jdbc_list] + await asyncio.gather(*tasks) + + +def get_schema_by_snapshot(provider_id: int, account_id: str, instance: str, region: str): + res = crud.get_schema_by_snapshot(provider_id, account_id, instance, region) + return res[0][0].split('\n') if res else None, res[0][1] if res else None + +def get_schema_by_real_time(provider_id: int, account_id: str, instance: str, region: str, db_info: bool = False): + db, subnet_id = None, None + assume_account, assume_region = __get_admin_info(JDBCInstanceSourceBase(account_provider_id=provider_id, account_id=account_id, instance_id=instance, region=region)) + connection_rds = crud.get_connection_by_instance(provider_id, account_id, instance, region) + glue = __get_glue_client(assume_account, assume_region) + connection = glue.get_connection(Name=connection_rds[0][0]).get('Connection', {}) + if connection_rds[0] and connection_rds[0][0]: + subnet_id = connection.get('PhysicalConnectionRequirements', {}).get('SubnetId') + if db_info: + connection_properties = connection.get("ConnectionProperties", {}) + jdbc_source = JdbcSource(username=connection_properties.get("USERNAME"), + password=connection_properties.get("PASSWORD"), + secret_id=connection_properties.get("SECRET_ID"), + connection_url=connection_properties.get("JDBC_CONNECTION_URL") + ) + db = list_jdbc_databases(jdbc_source) + return db, subnet_id + +def sync_schema_by_job(provider_id: int, account_id: str, instance: str, region: str, schema: str): + jdbc_targets = [] + # Query Info + info = crud.get_crawler_glueDB_by_instance(provider_id, account_id, instance, region) + if not info: + return + crawler_role_arn = __gen_role_arn(account_id=account_id, + region=region, + role_name='GlueDetectionJobRole') + db_names = schema.split("\n") + for db_name in db_names: + trimmed_db_name = db_name.strip() + if trimmed_db_name: + jdbc_targets.append({ + 'ConnectionName': info[0][2], + 'Path': f"{trimmed_db_name}/%" + }) + # Update Crawler + assume_account, assume_region = __get_admin_info(JDBCInstanceSourceBase(account_provider_id=provider_id, account_id=account_id, instance_id=instance, region=region)) + try: + __get_glue_client(assume_account, assume_region).update_crawler( + Name=info[0], + Role=crawler_role_arn, + DatabaseName=info[1], + Targets={ + 'JdbcTargets': jdbc_targets, + }, + SchemaChangePolicy={ + 'UpdateBehavior': 'UPDATE_IN_DATABASE', + 'DeleteBehavior': 'DELETE_FROM_DATABASE' + } + ) + except Exception as e: + logger.error(traceback.format_exc()) + raise BizException(MessageEnum.BIZ_UNKNOWN_ERR.get_code(), + MessageEnum.BIZ_UNKNOWN_ERR.get_msg()) + # Update RDS + crud.update_schema_by_account(provider_id, account_id, instance, region, schema) + +def __get_admin_info(jdbc): + account_id = jdbc.account_id if jdbc.account_provider_id == Provider.AWS_CLOUD.value else admin_account_id + region = jdbc.region if jdbc.account_provider_id == Provider.AWS_CLOUD.value else admin_region + return account_id, region diff --git a/source/constructs/api/requirements.txt b/source/constructs/api/requirements.txt index bab6f583..d3193ce3 100644 --- a/source/constructs/api/requirements.txt +++ b/source/constructs/api/requirements.txt @@ -10,4 +10,5 @@ sqlakeyset==1.0.1659142803 requests==2.31.0 urllib3==1.26.18 python-jose==3.3.0 -pydantic==1.10.13 \ No newline at end of file +pydantic==1.10.13 +python_multipart==0.0.6 \ No newline at end of file diff --git a/source/constructs/lib/admin/database/whole/10_data_source.sql b/source/constructs/lib/admin/database/whole/10_data_source.sql index 4b4d9702..1c9ad124 100644 --- a/source/constructs/lib/admin/database/whole/10_data_source.sql +++ b/source/constructs/lib/admin/database/whole/10_data_source.sql @@ -288,4 +288,3 @@ create table source_s3_bucket create index detection_history_id on source_s3_bucket (detection_history_id); - diff --git a/source/portal/public/locales/en/common.json b/source/portal/public/locales/en/common.json index 4a94c17a..1703ee76 100644 --- a/source/portal/public/locales/en/common.json +++ b/source/portal/public/locales/en/common.json @@ -98,6 +98,7 @@ "savexlsxSensitiveOnly": "Download .xlsx file (Sensitive data only)", "savecsvSensitiveOnly": "Download .csv file (Sensitive data only)", "addDataSource": "Add data source", + "addDataSourceBatch": "Batch add data source", "deleteDataSource": "Delete data source", "deleteDataSourceOnly": "Delete data catalog only", "disconnectDeleteCatalog": "Disconnect & Delete catalog", diff --git a/source/portal/public/locales/en/datasource.json b/source/portal/public/locales/en/datasource.json index 97d8e30b..263a3f44 100644 --- a/source/portal/public/locales/en/datasource.json +++ b/source/portal/public/locales/en/datasource.json @@ -9,6 +9,10 @@ "filterInstances": "Filter instances", "connectToRDSDataSource": "Connect to RDS data source", "rdsInstances": "RDS instances", + "credential":"Credential", + "security":"Security group", + "chooseSg":"Choose security groups", + "emptySg":"No security groups", "connectionTips": "The connection may takes around 20-30 seconds.", "connectToDataSourceForAccount": "Connect to data source for account Id: ", "connectToDataSourceForAccountDesc": "You can create data catalogs by connecting data source. ", diff --git a/source/portal/public/locales/zh/common.json b/source/portal/public/locales/zh/common.json index 6e187455..6e5c0ba9 100644 --- a/source/portal/public/locales/zh/common.json +++ b/source/portal/public/locales/zh/common.json @@ -98,6 +98,7 @@ "savexlsxSensitiveOnly": "下载 .xlsx 文件(只包含敏感数据)", "savecsvSensitiveOnly": "下载 .csv 文件(只包含敏感数据)", "addDataSource": "添加数据源", + "addDataSourceBatch": "批量添加数据源", "deleteDataSource": "删除数据源", "deleteDataSourceOnly": "仅删除数据目录", "disconnectDeleteCatalog": "断开连接并删除目录", diff --git a/source/portal/public/locales/zh/datasource.json b/source/portal/public/locales/zh/datasource.json index d2f04b06..20bb87ca 100644 --- a/source/portal/public/locales/zh/datasource.json +++ b/source/portal/public/locales/zh/datasource.json @@ -9,6 +9,10 @@ "filterInstances": "筛选实例", "connectToRDSDataSource": "连接到 RDS 数据源", "rdsInstances": "RDS 实例", + "credential":"认证方式", + "security":"安全组", + "chooseSg":"选择安全组", + "emptySg":"没有相关信息", "connectionTips": "连接可能需要大约 20-30 秒。", "connectToDataSourceForAccount": "连接到账户 ID 的数据源: ", "connectToDataSourceForAccountDesc": "您可以通过连接数据源来创建数据目录。 ", diff --git a/source/portal/src/apis/data-source/api.ts b/source/portal/src/apis/data-source/api.ts index 535231f2..eaa73ee8 100644 --- a/source/portal/src/apis/data-source/api.ts +++ b/source/portal/src/apis/data-source/api.ts @@ -201,6 +201,11 @@ const queryJdbcDatabases = async (params: any) => { return result; }; +const batchCreateDatasource = async (params: any) => { + const result = await apiRequest('post', 'data-source/batch-create', params.files); + return result; +}; + export { getDataSourceS3ByPage, getDataSourceRdsByPage, @@ -237,5 +242,6 @@ export { connectDataSourceGlue, deleteGlueDatabase, updateConnection, - queryJdbcDatabases + queryJdbcDatabases, + batchCreateDatasource }; diff --git a/source/portal/src/pages/account-management/index.tsx b/source/portal/src/pages/account-management/index.tsx index b82065ad..2d853629 100644 --- a/source/portal/src/pages/account-management/index.tsx +++ b/source/portal/src/pages/account-management/index.tsx @@ -5,6 +5,7 @@ import AccountList from './componments/AccountList'; import { getSourceCoverage } from 'apis/data-source/api'; import { AppLayout, + Button, ContentLayout, Grid, Header, @@ -20,11 +21,17 @@ import HelpInfo from 'common/HelpInfo'; import { buildDocLink } from 'ts/common'; import ProviderTab, { ProviderType } from 'common/ProviderTab'; import { CACHE_CONDITION_KEY } from 'enum/common_types'; +import { useNavigate } from 'react-router-dom'; const AccountManagementHeader: React.FC = () => { const { t } = useTranslation(); + const navigate = useNavigate() return ( -
+
navigate(RouterEnum.BatchOperation.path)}>Batch Operation} + > {t('account:connectToDataSource')}
); diff --git a/source/portal/src/pages/batch-operation/index.tsx b/source/portal/src/pages/batch-operation/index.tsx new file mode 100644 index 00000000..46d344d0 --- /dev/null +++ b/source/portal/src/pages/batch-operation/index.tsx @@ -0,0 +1,84 @@ +import { Button, FileUpload, Flashbar, FormField } from "@cloudscape-design/components"; +import React, { useState } from "react"; +import { batchCreateDatasource } from 'apis/data-source/api'; + +const BatchOperation = ()=>{ + const [value, setValue] = useState([] as any); + const [errors, setErrors] = useState([] as any); + const [disable, setDisable] = useState(true); + const [isLoading, setIsLoading] = useState(false); + const [items, setItems] = React.useState([ + { + type: "info", + dismissible: true, + dismissLabel: "Dismiss message", + onDismiss: () => setItems([]), + content: ( + <> + This is an info flash message. It contains{" "} + . + + ), + id: "message_1" + } + ] as any); + const [result, setResult] = useState("OK") + const changeFile=(file:any)=>{ + if(file[0].name.endsWith(".xlsx")===true){ + setErrors([]) + setDisable(false) + } else { + setErrors(["Uploaded file must have an xlsx extension."]) + } + setValue(file) + } + + const batchCreate = async () => { + setIsLoading(true); + const result: any = await batchCreateDatasource({files: value}); + setIsLoading(false) + if(result){ + setResult("OK") + } else { + setResult("NG") + } + }; + + return ( + <> + + changeFile(detail.value)} + value={value} + i18nStrings={{ + uploadButtonText: e => + e ? "Choose files" : "Choose file", + dropzoneText: e => + e + ? "Drop files to upload" + : "Drop file to upload", + removeFileAriaLabel: e => + `Remove file ${e + 1}`, + limitShowFewer: "Show fewer files", + limitShowMore: "Show more files", + errorIconAriaLabel: "Error" + }} + invalid + fileErrors={errors} + showFileLastModified + showFileSize + showFileThumbnail + tokenLimit={3} + constraintText="Hint text for file requirements" + /> + + + + + ); +} + +export default BatchOperation \ No newline at end of file diff --git a/source/portal/src/pages/data-source-connection/componments/DataSourceList.tsx b/source/portal/src/pages/data-source-connection/componments/DataSourceList.tsx index b6f0ccd9..6ccfd21d 100644 --- a/source/portal/src/pages/data-source-connection/componments/DataSourceList.tsx +++ b/source/portal/src/pages/data-source-connection/componments/DataSourceList.tsx @@ -16,6 +16,7 @@ import { ButtonDropdown, ButtonDropdownProps, StatusIndicator, + Multiselect, } from '@cloudscape-design/components'; import { DATA_TYPE_ENUM, TABLE_NAME } from 'enum/common_types'; import { @@ -126,6 +127,8 @@ const DataSourceList: React.FC = memo((props: any) => { const [showAddConnection, setShowAddConnection] = useState(false); const [showEditConnection, setShowEditConnection] = useState(false); + const [sgs, setSgs] = useState([] as any); + const [selectedSgs, setSelectedSgs] = useState([] as any); useEffect(() => { if (tagType === DATA_TYPE_ENUM.jdbc && !showAddConnection) { @@ -176,6 +179,11 @@ const DataSourceList: React.FC = memo((props: any) => { id: 'addDataSource', disabled: tagType !== DATA_TYPE_ENUM.jdbc, }, + // { + // text: t('button.addDataSourceBatch'), + // id: 'addDataSourceBatch', + // disabled: tagType !== DATA_TYPE_ENUM.jdbc, + // }, { text: t('button.deleteDataSource'), id: 'deleteDataSource', @@ -1115,7 +1123,19 @@ const DataSourceList: React.FC = memo((props: any) => {

- + {/* + + setSelectedSgs(detail.selectedOptions) + } + options={sgs} + empty={t('datasource:emptySg')||''} + placeholder={t('datasource:chooseSg')||''} + /> + */} + + setCedentialType(detail.value)} value={cedentialType} diff --git a/source/portal/src/routers/routerEnum.tsx b/source/portal/src/routers/routerEnum.tsx index 28f5610c..d2dee928 100644 --- a/source/portal/src/routers/routerEnum.tsx +++ b/source/portal/src/routers/routerEnum.tsx @@ -13,6 +13,7 @@ import CreateJobOld from 'pages/create-job/indexOld'; import GlueJob from 'pages/glue-job'; import LoginCallback from 'pages/login-callback'; import TimeLine from 'pages/time-line'; +import BatchOperation from 'pages/batch-operation' interface RouterEnumType { path: string; @@ -65,4 +66,8 @@ export const RouterEnum: Record = { path: '/time-line', element: , }, + BatchOperation: { + path: '/batch-operation', + element: , + } }; From 4a3f373f0f41b6857b343d18426f924b5f6e8252 Mon Sep 17 00:00:00 2001 From: Magic Chen Date: Mon, 22 Jan 2024 16:43:36 +0800 Subject: [PATCH 002/112] chore: upload batch file --- .../public/files/BatchCreateConnections.xlsx | Bin 0 -> 10070 bytes source/portal/public/locales/en/common.json | 29 +- .../portal/public/locales/en/datasource.json | 21 +- source/portal/public/locales/zh/common.json | 29 +- .../portal/public/locales/zh/datasource.json | 21 +- source/portal/src/apis/data-source/api.ts | 138 ++++-- .../src/pages/account-management/index.tsx | 24 +- .../src/pages/batch-operation/index.tsx | 409 ++++++++++++++---- .../componments/DataSourceList.tsx | 10 +- .../portal/src/pages/left-menu/Navigation.tsx | 12 + .../src/pages/system-settings/index.tsx | 130 ++++++ source/portal/src/routers/routerEnum.tsx | 9 +- 12 files changed, 698 insertions(+), 134 deletions(-) create mode 100644 source/portal/public/files/BatchCreateConnections.xlsx create mode 100644 source/portal/src/pages/system-settings/index.tsx diff --git a/source/portal/public/files/BatchCreateConnections.xlsx b/source/portal/public/files/BatchCreateConnections.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..5d5682e95dc8d8775a905665e03f46dc673be08c GIT binary patch literal 10070 zcmeHtg;QP0_VvYryIXLAyK{ly?h@SH-6gmOcL^HY-QC@SI|O$L&X1XS@0*#-`~HIW z`c`$swuR2u{Uhz17$PytY&8iF>~jz-pw zI!bP~Mh;qZu2z{xqswX-ZV+>eadIy#ZYX@F$Ah8_hLp-p~?QXErs4BpS%8A}5y$m{)0aK-9{ zoP*0<8!{?u$Q;8(Gf3N0sI%r~lpfkw+qB}~;lv&*7|z=Hazv_%l==!y_76#t?G6-; z%yYU1plb|4(QOE;s`k+Y|e~|Hk834wxZd{AIKDv zH~(jTw+P3VZYN-`z?Tq$PV9klJ8hf*D-WoY9dN9qEaWm5&gihH5F7 z$%&Y~uh(&@nzdBFOVWec1i77!4LJW~Mc4vEz3HMZ3_}5TN3}fThOimC8UusUKPh|f zCklaza(sAHasrEx>VVqB8#_poiN^8cj)Dw3IQJW|&~?*v5R3UZgA#&=X(xQB}=_v3xOxdw~Ni%YNX1p^rFt`4; z(*Fz@_QSz1$zTA$$XoV<`1ZtG$ow@}N|l$bx7m=q=qG)UuFK_S$6}F8^j0SULu|IK z;*W+5C`PC;LP>f0kX!tU^aFQ9c^J#%SRNg zD@D`1RL=lmS&Dnv;HyXOKNF~Y#PXks29oZh&Ts;&QMx{;L5P-apu;WmMF#P!`>W_f zt$mn!B4HnB<)}~-JiDD{9W=g5sO3_OG-DAeZq*M{QFH(mY|lR<7wQmq|K3RzG{831 zCqK^KU1%!(AVT|bkOYYJj(_KF2_=Bh0u3Eso@F-!6#xCxgW4GU!a=HkcO>U?iHV

ht#fx(N34tD%w`Y7oFhJxBito5&aIPJr<%D_-JJ8dR z#6<{mUPV{ibS(D{DQWp;*7@F7l({V~Jm(348f397ox)N#Wt{5|hn%er^t9O#|$A7<%hb&0w1qKPkZT490>;A1~H~;=3hgw6yOA#ux-<9;p^3aW?S{ z)c`M?zFoVJyzp61WOHp@_~)sWhswe74~yPKjG1L1K~FDngL|&WNf39bKhukWUlZVF7BF!7biQ zzF_~4ARbhzNVR?gF*+;&0RIPw9UR>(jU0a4??+WMqSD#Wyenp3b?+&?berI!^}vNv zGK9n8RT7-3Z{|MhmEHM8nDo>h`kXN`Fqg(!vBycH}XNarM|aflFXbe<8cj_W){fX3s1h@3_||#icZ`f`*^*IN(Ut6X$J=tNl)o zhnX`Y4FBR)P&$VP%OLT0IcS;$8Cwi%w4IdjRhOQw~)2#hrbemkshCK@Sd{U(re&* zC`j?otH3g}PdM1N_^RU#DB|E5Dw?R{5@V&6M1B`Nr0;)5u4T?czpHO3(Ht?*k1HU9 zYELSkIGD0Z*#v!$n3)7zz$AcaTPl7yAL$mz&33Ou+kV<<7;J_DcY^`3E=+`wngq6U zJ8lp{H*s`D9KD!0l?-(u=Iq(*qnp$jO=;8j=+|mBr`4|qjxjGVW0q8GuGeV$G?e!jH|U04@g{z%kReAyPg+5CjZ$H`o$sZ7nlW5g}l9 zy->``@`oQMBy6r*4s>l^rhy?Qc=TuIc#@BT)Vv=LWyatzXbEP_mr*D5F<*-2VRtEr z!sdWZMn1g|2nc1Q5~l8MM2*TFNw)S4`HJ)n+Q zo3s)~c)KRjzK2FV;nJO4n&H$MmQH->uZ3rAlS8a-8%1~Q2%P^}kU10^#TEoToC16} z+|SNOO9!8ML9}5l&7Ef8R_y}YpXzP1enkUKSuio~^Rzpw&3t;y`mf48};+)G=7=2Jd zF^!$Z9KP%15d&h~BwsNG@bLZDk};_B70R$kV(+y5^yNIfn21P@;S(f$Jc?RnHyL9D z;3Fv3KNVMzWmj$YR11Uz?kk+)6D?ipWioQV-}0FLNr8gmZF*1kyocF9k1?e}F0}hj zP0%{)m8nTe*Hd-M;qsu%e3}r-U)>{L2dq$Y;O6LAy`z4*0K}BVFX3DL0@qQ3b+jJ5 z+{0XU%cnEt5sbA-9j%%s{Nkv<`79-$${+%`<_%%k3Vupn&7R;4;{ubT`7AUYjh=bQ z6K%PMYYRCcar?#~y1+Vfm!>6j!|wersB3Wk{B5z66Y|mUCC3ViQmVx4yXq0AOuT^4 zpbDhoTSI+tvXfBNjo}oW8@|bVF5f z6>eSidxxazpQy9u*a_(~c|2au53Yhcczmw+?|XEro9d_}ts%)26;J1)bY7pIG1qiD zUd|6M*binuTy*z#Jl&5Zb-Z41-BfO^pwqW|U0xoKYC!Z6AR-Kn` z8k~`b>-i!)UV!jr^fCza#(qTJcP=~~83fhVAUy1Lsyyjl##8NwXa#myEH)EdG?9Ku zsSv4xSTTrj#&Y4>C{7(YKex@4T8+DE3T+9UjIlVSlPCrJ++)uAw56~ZS!whGJ`5%F z8%Ni)Vn>x=1b#`2V94$~57U-ZW$5)OQiXrW44I(4E|~&dt)PC$NVgGTz|!T|npH(l zp~W1Jry+^n+j}s0hZODhkC@WQA1iPQF|y*fha2uN+52>fl9$ty3b}_-UOgDl1W5ki z3ckVN?|^~ZMtFR$)yozuX}>?+6pqCl!{fXVDF<{|l8PPd^(IFk5{_~47svFBTI3eu z5yX?LW?X52uJ3*}CT@h-F;9*E5b|I&j$hRpkF8G(ZnOlfU#J@kW#;C(DoR*t z!BTXL=*RMC;B`=))MFmmYnu8*teb#;EKy25IIet?t_xxmTWs;2(mn}_+UVy+yj7)% zjh&w>21O|Hdfq*b(5i$M$8Nfkd&U<8=?zxUU4otP8OnKxeTLYU4)LQ%C^eHLcZ^d{ zEn8}Do0OerP)3Qm2G5>G8#vXtgsvOzK9-m6<%tATQX7 z8vW~J*`v&rYgFWnj7d{zb7~PRMC`pQrDmjHlEk-tb)K`eve_f=eI*yQq<0jHGgx|3 z#<=oMSn@gu0%+PM0oscxs#A`tQ*wt%+3F&8yD$t^=;*Gyw;RH3yTSZR{6WTe@p}g7 z1+^)y5#=!BDI)H1%ta|6a{Bblgi(%)i_La!=1ykCZNho9BoU4^kqpxeewdpy++&H4 zYW>YA!Lv-VP5Mo^_RNQoh{zVg#2*@c$;lNbd%3Oe>&&4Vs7P2W?os!#dgfiX~xM6zhjI^_K5GL)uHuCfM=P)%dw*V^cjg)eLM9d zH9?vkk|Xz^D^nMVS{W{7dt=1~j6IN3rzg){q0|h5L%XVkwB*MXSQT~B%Fk@8R=_AdNM8@62 z@Jj2PF{Fzx(B0ODqd;L_PQWIKgyLZucQBq!H+&^dEfQ+ImGcAbt2d=39%E)u2=Ajc6;)ZUFye7iT(F2I4-lVXzS;fFS-Rbr&?N}gkc(|0# zC}!Q}!&bZIqNm*%GGWGJ-Efwgm9u%EaUBl@c$-Y_wlzQ6h%$9 z(6_u83K{^w`h$LdJKQ>$8W}k{(EmRC&cW$%(oty)NI_=_4+J#pUAj=KS<%Y8b397L z4{pv8&?*ey>T8CImtNOE1gaGfgsDN^N38Ez2mHxzj zxi>iC(KW;`aZou>H@p*8ed2h%({rBwp({y*2P$C2tjRT>7oe&Q(^p!7E8mkOfZK=H zn#xlZiA-}QR7}hQg_4t8Y9}i;h808yGt{Skm5Wk$L?og#%KuUyABk@o7S%;*j4;r@ zttLq+d(a70;UivuL~f05csD*g+v-zBTA&7I*KcGYVcRr|oYWuxibQRXL?ngz?GWz*fhF2n6U zmQ44NzMZ&+^hsrb?GviZa-9p$L`ja#dMZPqm`&ki?@)%6iRPDkvc94A&=`Z3vmBHa z=yNDW^JRyEn{2H+a77R#tA;GHCB9F_3rs`k4{Yz@>a`GkSWO2?RSymuMzx^n&MS6u zbO(mLW(p<*P9-60pl6rZBvU&|19vC$H%@212OYsAp9@LU`{1%*bN@WJ6_a>o2=xz; zVb~Hqs>NxgyVQmMjTT^N6mace&0Ajbd5ewL*WWwOxK-5>@Z^Y8hS8tji7rNl z^0*zlD6xOE65@Z9SF;*O)7fry3hsPI4~@3ePeo8A)d)pwOR%3WSCp6}AnBpVt3d|l z28Id)2SEm*2xE+d4Dp#4OE8Nu#X*gI0OHZ`P6G<`r%D+ZJ3~fi!)gOQhW{G7T z%^Q#S!k?L9Pu4Rr+Q`@|(ie%$_G87hr{52^VnMaOzmnbMY(dpdFlO{SBHElXKRhDX zm-=0`9_e@ov1VU-kE}APv&Xx)JoR6bAQUMN)6IY+R8f5lV(A|uH~sl;RZwV^G~cYj zvfkCJ4C#^mjIn1Sg>*W@p6-0dYU({a*JA@AntN-CHNyhOGMQg${XtpZi~H{z&p)@hjipEAyrZ zp#1?~2U9(JBSS?;doyd3-%q!e%33x{#%S%opKi@tOo`@LghS+L`P0~>6_pZKWxlxK z0)3^QQ%jpHcmaZUO9CzVl4+Gw5k|CG&XS$cI|HM8bzXBFl?Ij+L`_x4R~P9HnOAsF z>ptlc_D4ebvBdP}4le#cQA7Dq$;&0sj?0rhmmh9l%>D2%W5c8{?%)OPA7hpqy5r69 zehfSM%9^Tkp+U(p@+*M0mE?s+WCn+&qOd$E&4aR(p%BTS2ACEZZfG;ynGuqSF-?R@ zhZSRtgKK6N{IVPngDG!$L5PvF&7iEbp@kKuf9X*VmayZ4aZP z^XjGR9457rN>0F}_0(H{g*K#-T@x_gEOQs))ns!kg8?Be)mTfj zX{>3jR-%XQcuQl0AXDZ(!a`L#>2j$&>ErK<98RY(;j%D8N8OMj@4-j2=ySS|Z^g03 z8n9p&JgWJ_@YlF>&K`SBClW^?vG;Q&(mYR*EgW3J|E1Q}a(U2Ty?AWKTA0p_O->5ei=bD{8ouIDwboV0B^K zJ!znthF)DPE~V-SOJub*Wf)4_KQ4u8r1U!XMo*&N|A*sNTF!wf#AxQo3RJtap23 z)}5m5zm=`b*PG-9J^&WSdXO!o1Q)Cx`fMNetoNx@T`;hz=QyKr<2Y;MCd_n_&*|1B z%LQm3fe0Q<7$qEgeF|-$N{Q%Z5X(x`&y8XT6&j$K(q^s8IAgk%pbc_O>9?YBWOxN5 zOcI&W29tiqpB)hl^XwvbvK7lPTI^W0creo^i`d(?Ii9jCrV{L*cvBmI(nB*DmHtfb z_@Fy$V!dWDsqtEvjmXCV^28VN^j2K{XZ6|RMh9N@jk-PF1PkOh!NSnSK+fLA)`8x@ z#@^^(Qv3gOinpyVB353qhXE~c9pY7R$Rn+GhEs6acQ_Ld#+jl$aK{<8UR*rK>2@p zj5}QCE1HYTAmaxQ6jvKggF8vLpy*FL%Q9xWMA+?9NK57PVbNG&c@!bHp z1Ds1|C3fikXEaYCwzF@CAbtjgS?_jfzT{&0c;F{5y>LxZqngmJwqIl**cDiILE1U` zsB_{h*GDd?5%lauU5q@u6jk6z%$@A*XRSCEW#D+)r>ZPOYg!Rum*1MkA zKP8{#AY$GSjroLJ`O#2_@9%mF06{O&zIrA-y%k9R8HKmdOo>TvQAqN}HAsJrLOomC z{{`V&^!=-)$4c8Q|0b}4-4ej9;&QIa!TFZvh;d(6=77XHn@E2%$gU<0DUCUvkE)-V zJ#b!%B%s&k%_%HL%%FE4+}AJcwU-YA^^2^7eUt#6wCi}HLc~JXT`+zm`|^29-RC8b zjgx$E2LM|Sr5c(@QI;pPEMQ{Wb<&wZ{$m`ez>VHuh6A=?s@)k$OgnKmApnfOFuPss zgCHYkx%-384A+t`;#Ze}-cI$^yqyfz-ileo@gE^o*;`$zm=yXNEBG|7WP40~=^B+4 zi&OGR661<>b^Sf5qWk@>;AHEthwn!#+av9;>kiH4{PUAx9Xs57lXC8ikls~FT$6L7 zhJZjW#z$1iimAOu+(T#p1jy7F&E+%88aKG;L26wFxgryA3=~}TA~Jukk|f7#vJ3NWJol3|!?ao4vntyYuCtO3 zO7+>Qy;bC2y%MP*kosN*?NPYS;=>-~;F@WzO<#ern-u%%qM~^D+yJuZFE}Dct{GjsDfo zucG9ip3dH6%3mbPUyXkiL;f_bh5Oz3Pif>=(_hQAf0{}o{m0_%uMU1aaQ^9F6!ZVR z_&<)Hzk2yK@BPzDIPM=`@M{+QtB1ey>Yw%ifYn>n{DWnGHUImD_*e5QqQ98`YkQQF Vgm_zB008Fg_xX*;J4t{0_kVwVzo-BJ literal 0 HcmV?d00001 diff --git a/source/portal/public/locales/en/common.json b/source/portal/public/locales/en/common.json index 1703ee76..b5d0ac16 100644 --- a/source/portal/public/locales/en/common.json +++ b/source/portal/public/locales/en/common.json @@ -20,7 +20,8 @@ "manageIdentifier": "Manage data identifiers", "doc": "Documentation", "version": "Version", - "dataDiscovery": "Data discovery" + "dataDiscovery": "Data discovery", + "systemSettings": "System settings" }, "breadcrumb": { "home": "Sensitive Data Protection Solution", @@ -104,7 +105,10 @@ "disconnectDeleteCatalog": "Disconnect & Delete catalog", "deleteDB": "Delete Database", "editDataSource": "Edit data source", - "download": "Download" + "download": "Download", + "batchOperation": "Batch Operation", + "estimateIP": "Estimate IP usage", + "upload": "Upload" }, "label": { "label": "Custom label", @@ -255,8 +259,8 @@ "dbCount": "Database count", "link": "Catalog Link", "propertyValue": { - "jobName" : "Job name", - "templateId" : "Template ID", + "jobName": "Job name", + "templateId": "Template ID", "schedule": "Schedule", "description": "Description", "range": "Range", @@ -424,5 +428,20 @@ "glue": "Glue data catalogs", "jdbc": "Custom database (JDBC)", "structureData": "Structured data", - "unstructuredData": "Unstructured data" + "unstructuredData": "Unstructured data", + "uploadSuccess": "Upload Success", + "settings": { + "title": "Settings", + "desc": "Overall settings for sensitive data discovery jobs", + "rdsDataSourceDiscovery": "RDS Data Source Data Discovery", + "rdsDetectedConcurrency": "Number of RDS instance detected concurrently in sensitive discovery job", + "rdsDetectedConcurrencyDesc": "How many RDS instances will scanned concurrently", + "rdsSubJobRunNumber": "Number of sub-job runs can be used for 1 RDS scan", + "rdsSubJobRunNumberDesc": "How many Glue job runs can be used for 1 RDS scan", + "subnet": "Subnet ", + "subnetNameDesc": "Total number of left IPs in subnet", + "currentIPLeft": "Current IP left", + "subnetDesc": "IP usage per subnet = (Concurrent number of RDS Instance * (3 + Concurrent number of sub-job runs)) / Count of subnets", + "estimateResult": "Based on the above settings, for each job run it will consume {{ipCount}} IPs maximum per subnet." + } } diff --git a/source/portal/public/locales/en/datasource.json b/source/portal/public/locales/en/datasource.json index 263a3f44..cb0fc6e6 100644 --- a/source/portal/public/locales/en/datasource.json +++ b/source/portal/public/locales/en/datasource.json @@ -9,10 +9,10 @@ "filterInstances": "Filter instances", "connectToRDSDataSource": "Connect to RDS data source", "rdsInstances": "RDS instances", - "credential":"Credential", - "security":"Security group", - "chooseSg":"Choose security groups", - "emptySg":"No security groups", + "credential": "Credential", + "security": "Security group", + "chooseSg": "Choose security groups", + "emptySg": "No security groups", "connectionTips": "The connection may takes around 20-30 seconds.", "connectToDataSourceForAccount": "Connect to data source for account Id: ", "connectToDataSourceForAccountDesc": "You can create data catalogs by connecting data source. ", @@ -91,5 +91,18 @@ "sg": "Security groups", "sgDesc": "Choose one or more security groups to allow access to the data store in your VPC subnet. Security groups are associated to the ENI attached to your subnet. You must choose at least one security group with a self-referencing inbound rule for all TCP ports.", "chooseSG": "Choose one or more security groups" + }, + "batch": { + "name": "Batch Operation", + "nameDesc": "Operate data source in batch", + "tab": "Batch data source creation", + "step1Title": "Step 1: Download Template", + "step1Desc": "Follow instruction in template to fill in information", + "step1Download": "Download template 'BatchCreateDataSourceTemplate.xlsx'", + "step2Title": "Step 2: Follow the instruction to fill in the template", + "step2Desc": "Fill the information in the template", + "step2Tips1": "Making sure no duplicates", + "step3Title": "Step 3: Upload the template with filled information", + "uploadTitle": "Fill in the template and upload" } } diff --git a/source/portal/public/locales/zh/common.json b/source/portal/public/locales/zh/common.json index 6e5c0ba9..eca3ad2b 100644 --- a/source/portal/public/locales/zh/common.json +++ b/source/portal/public/locales/zh/common.json @@ -20,7 +20,8 @@ "manageIdentifier": "管理数据识别规则", "doc": "文档", "version": "版本", - "dataDiscovery": "数据发现" + "dataDiscovery": "数据发现", + "systemSettings": "系统设置" }, "breadcrumb": { "home": "敏感数据保护解决方案", @@ -104,7 +105,10 @@ "disconnectDeleteCatalog": "断开连接并删除目录", "deleteDB": "删除数据库", "editDataSource": "编辑数据源", - "download": "下载" + "download": "下载", + "batchOperation": "批量操作", + "estimateIP": "评估 IP 使用情况", + "upload": "上传" }, "label": { "label": "标签", @@ -255,8 +259,8 @@ "dbCount": "数据库个数", "link": "目录链接", "propertyValue": { - "jobName" : "任务名", - "templateId" : "模版ID", + "jobName": "任务名", + "templateId": "模版ID", "schedule": "调度方式", "description": "描述", "range": "范围", @@ -424,5 +428,20 @@ "glue": "Glue 数据目录", "jdbc": "自定义数据库(JDBC)", "structureData": "结构化数据", - "unstructuredData": "非结构化数据" + "unstructuredData": "非结构化数据", + "uploadSuccess": "上传成功", + "settings": { + "title": "设置", + "desc": "敏感数据发现作业的总体设置", + "rdsDataSourceDiscovery": "RDS数据源数据发现", + "rdsDetectedConcurrency": "敏感发现作业中同时检测到的 RDS 实例数", + "rdsDetectedConcurrencyDesc": "将同时扫描多少个 RDS 实例", + "rdsSubJobRunNumber": "1 次 RDS 扫描可使用的子作业运行次数", + "rdsSubJobRunNumberDesc": "1 次 RDS 扫描可运行多少次 Glue 作业", + "subnet": "子网 ", + "subnetNameDesc": "子网中剩余IP总数", + "currentIPLeft": "当前剩余IP", + "subnetDesc": "每个子网的 IP 使用量 = (RDS 实例并发数量 * (3 + 子作业并发运行数量)) / 子网数量", + "estimateResult": "根据上述设置,对于每个作业运行,每个子网最多将消耗 {{ipCount}} 个 IP。" + } } diff --git a/source/portal/public/locales/zh/datasource.json b/source/portal/public/locales/zh/datasource.json index 20bb87ca..fe78a820 100644 --- a/source/portal/public/locales/zh/datasource.json +++ b/source/portal/public/locales/zh/datasource.json @@ -9,10 +9,10 @@ "filterInstances": "筛选实例", "connectToRDSDataSource": "连接到 RDS 数据源", "rdsInstances": "RDS 实例", - "credential":"认证方式", - "security":"安全组", - "chooseSg":"选择安全组", - "emptySg":"没有相关信息", + "credential": "认证方式", + "security": "安全组", + "chooseSg": "选择安全组", + "emptySg": "没有相关信息", "connectionTips": "连接可能需要大约 20-30 秒。", "connectToDataSourceForAccount": "连接到账户 ID 的数据源: ", "connectToDataSourceForAccountDesc": "您可以通过连接数据源来创建数据目录。 ", @@ -91,5 +91,18 @@ "sg": "安全组", "sgDesc": "选择一个或多个安全组以允许访问在你的 VPC 子网中的数据存储。安全组与你的子网关联的ENI相关联。你必须选择至少一个对所有 TCP 端口有自我引用入站规则的安全组。", "chooseSG": "选择一个或多个安全组" + }, + "batch": { + "name": "批量操作", + "nameDesc": "批量操作数据源", + "tab": "批量创建数据源", + "step1Title": "第 1 步:下载模板", + "step1Desc": "按照模板中的说明填写信息", + "step1Download": "下载模板'BatchCreateDataSourceTemplate.xlsx'", + "step2Title": "第 2 步:按照提示填写模板", + "step2Desc": "填写模板中的信息", + "step2Tips1": "确保没有重复项", + "step3Title": "第 3 步:上传填写信息的模板", + "uploadTitle": "填写模板并上传" } } diff --git a/source/portal/src/apis/data-source/api.ts b/source/portal/src/apis/data-source/api.ts index eaa73ee8..c2ec9812 100644 --- a/source/portal/src/apis/data-source/api.ts +++ b/source/portal/src/apis/data-source/api.ts @@ -46,13 +46,21 @@ const getDataSourceRdsByPage = async (params: any) => { // 分页获取DataSource Glue列表 const getDataSourceGlueByPage = async (params: any) => { - const result = await apiRequest('post', 'data-source/list-glue-database', params); + const result = await apiRequest( + 'post', + 'data-source/list-glue-database', + params + ); return result; }; // 分页获取DataSource JDBC列表 const getDataSourceJdbcByPage = async (params: any, provider_id: number) => { - const result = await apiRequest('post', `data-source/list-jdbc?provider_id=${provider_id}`, params); + const result = await apiRequest( + 'post', + `data-source/list-jdbc?provider_id=${provider_id}`, + params + ); return result; }; @@ -117,32 +125,56 @@ const hideDataSourceJDBC = async (params: any) => { }; const deleteDataCatalogS3 = async (params: any) => { - const result = await apiRequest('post', 'data-source/delete-catalog-s3', params); + const result = await apiRequest( + 'post', + 'data-source/delete-catalog-s3', + params + ); return result; }; const deleteDataCatalogRDS = async (params: any) => { - const result = await apiRequest('post', 'data-source/delete-catalog-rds', params); + const result = await apiRequest( + 'post', + 'data-source/delete-catalog-rds', + params + ); return result; }; const deleteDataCatalogJDBC = async (params: any) => { - const result = await apiRequest('post', 'data-source/delete-catalog-jdbc', params); + const result = await apiRequest( + 'post', + 'data-source/delete-catalog-jdbc', + params + ); return result; }; const disconnectAndDeleteS3 = async (params: any) => { - const result = await apiRequest('post', 'data-source/disconnect-delete-catalog-s3', params); + const result = await apiRequest( + 'post', + 'data-source/disconnect-delete-catalog-s3', + params + ); return result; }; const disconnectAndDeleteRDS = async (params: any) => { - const result = await apiRequest('post', 'data-source/disconnect-delete-catalog-rds', params); + const result = await apiRequest( + 'post', + 'data-source/disconnect-delete-catalog-rds', + params + ); return result; }; const disconnectAndDeleteJDBC = async (params: any) => { - const result = await apiRequest('post', 'data-source/disconnect-delete-catalog-jdbc', params); + const result = await apiRequest( + 'post', + 'data-source/disconnect-delete-catalog-jdbc', + params + ); return result; }; @@ -152,57 +184,107 @@ const connectDataSourceJDBC = async (params: any) => { }; const connectDataSourceGlue = async (params: any) => { - const result = await apiRequest('post', 'data-source/sync-glue-database', params); + const result = await apiRequest( + 'post', + 'data-source/sync-glue-database', + params + ); return result; }; -const listGlueConnection = async (params: any) => { - const result = await apiRequest('post', 'data-source/query-glue-connections', params); +const listGlueConnection = async (params: any) => { + const result = await apiRequest( + 'post', + 'data-source/query-glue-connections', + params + ); return result; }; -const importGlueConnection = async (params: any) => { - const result = await apiRequest('post', 'data-source/import-jdbc-conn', params); +const importGlueConnection = async (params: any) => { + const result = await apiRequest( + 'post', + 'data-source/import-jdbc-conn', + params + ); return result; }; -const queryNetworkInfo = async (params: any) => { - const result = await apiRequest('post', 'data-source/query-account-network', params); +const queryNetworkInfo = async (params: any) => { + const result = await apiRequest( + 'post', + 'data-source/query-account-network', + params + ); return result; }; -const queryBuckets = async (params: any) => { +const queryBuckets = async (params: any) => { const result = await apiRequest('post', 'data-source/list-buckets', params); return result; }; -const createConnection = async (params: any) => { +const createConnection = async (params: any) => { const result = await apiRequest('post', 'data-source/add-jdbc-conn', params); return result; }; -const updateConnection = async (params: any) => { - const result = await apiRequest('post', 'data-source/update-jdbc-conn', params); +const updateConnection = async (params: any) => { + const result = await apiRequest( + 'post', + 'data-source/update-jdbc-conn', + params + ); return result; }; -const queryConnectionDetails = async (params: any) => { - const result = await apiRequest('post', 'data-source/query-connection-detail', params); +const queryConnectionDetails = async (params: any) => { + const result = await apiRequest( + 'post', + 'data-source/query-connection-detail', + params + ); return result; }; -const deleteGlueDatabase = async (params: any) => { - const result = await apiRequest('post', 'data-source/delete-glue-database', params); +const deleteGlueDatabase = async (params: any) => { + const result = await apiRequest( + 'post', + 'data-source/delete-glue-database', + params + ); return result; }; -const queryJdbcDatabases = async (params: any) => { +const queryJdbcDatabases = async (params: any) => { const result = await apiRequest('post', 'data-source/jdbc-databases', params); return result; }; -const batchCreateDatasource = async (params: any) => { - const result = await apiRequest('post', 'data-source/batch-create', params.files); +const batchCreateDatasource = async (params: any) => { + const result = await apiRequest( + 'post', + 'data-source/batch-create', + params.files + ); + return result; +}; + +const queryBatchStatus = async (params: any) => { + const result = await apiRequest( + 'post', + 'data-source/query-batch-status?batch=' + params.batch, + {} + ); + return result; +}; + +const downloadBatchFiles = async (params: any) => { + const result = await apiRequest( + 'post', + 'data-source/download-batch-file?filename=' + params.filename, + {} + ); return result; }; @@ -243,5 +325,7 @@ export { deleteGlueDatabase, updateConnection, queryJdbcDatabases, - batchCreateDatasource + batchCreateDatasource, + queryBatchStatus, + downloadBatchFiles, }; diff --git a/source/portal/src/pages/account-management/index.tsx b/source/portal/src/pages/account-management/index.tsx index 2d853629..e73f9f7c 100644 --- a/source/portal/src/pages/account-management/index.tsx +++ b/source/portal/src/pages/account-management/index.tsx @@ -25,13 +25,17 @@ import { useNavigate } from 'react-router-dom'; const AccountManagementHeader: React.FC = () => { const { t } = useTranslation(); - const navigate = useNavigate() + const navigate = useNavigate(); return (

navigate(RouterEnum.BatchOperation.path)}>Batch Operation} - > + actions={ + + } + > {t('account:connectToDataSource')}
); @@ -58,16 +62,18 @@ const AccountManagementContent: React.FC = () => { const [loadingAccounts, setLoadingAccounts] = useState(true); useEffect(() => { - if (currentProvider) { getSourceCoverageData(currentProvider.id); } sessionStorage[CACHE_CONDITION_KEY] = JSON.stringify({ - column: "account_provider_id", - condition: "and", - operation: "in", - values: (currentProvider == null || currentProvider.id === 1)?[1, 4]:[currentProvider.id] - }) + column: 'account_provider_id', + condition: 'and', + operation: 'in', + values: + currentProvider == null || currentProvider.id === 1 + ? [1, 4] + : [currentProvider.id], + }); }, [currentProvider]); const getSourceCoverageData = async (providerId: number | string) => { diff --git a/source/portal/src/pages/batch-operation/index.tsx b/source/portal/src/pages/batch-operation/index.tsx index 46d344d0..731bcf79 100644 --- a/source/portal/src/pages/batch-operation/index.tsx +++ b/source/portal/src/pages/batch-operation/index.tsx @@ -1,84 +1,345 @@ -import { Button, FileUpload, Flashbar, FormField } from "@cloudscape-design/components"; -import React, { useState } from "react"; -import { batchCreateDatasource } from 'apis/data-source/api'; +import { + AppLayout, + Button, + Container, + ContentLayout, + FileUpload, + Flashbar, + FlashbarProps, + FormField, + Header, + Icon, + ProgressBar, + SpaceBetween, + StatusIndicator, + Tabs, +} from '@cloudscape-design/components'; +import React, { useEffect, useState } from 'react'; +import CustomBreadCrumb from 'pages/left-menu/CustomBreadCrumb'; +import Navigation from 'pages/left-menu/Navigation'; +import { RouterEnum } from 'routers/routerEnum'; +import { useTranslation } from 'react-i18next'; +import HelpInfo from 'common/HelpInfo'; +import { buildDocLink } from 'ts/common'; +import axios from 'axios'; +import { BASE_URL } from 'tools/apiRequest'; +import { downloadBatchFiles, queryBatchStatus } from 'apis/data-source/api'; -const BatchOperation = ()=>{ - const [value, setValue] = useState([] as any); - const [errors, setErrors] = useState([] as any); - const [disable, setDisable] = useState(true); - const [isLoading, setIsLoading] = useState(false); - const [items, setItems] = React.useState([ +enum BatchOperationStatus { + NotStarted = 'NotStarted', + Inprogress = 'Inprogress', + Completed = 'Completed', + Error = 'Error', +} +interface BatchOperationContentProps { + updateStatus: (status: BatchOperationStatus) => void; +} + +const AddAccountHeader: React.FC = () => { + const { t } = useTranslation(); + return ( +
+ {t('datasource:batch.name')} +
+ ); +}; +let statusInterval: any; + +const BatchOperationContent: React.FC = ( + props: BatchOperationContentProps +) => { + const { t } = useTranslation(); + const { updateStatus } = props; + const [uploadDisabled, setUploadDisabled] = useState(false); + const [files, setFiles] = useState([] as any); + const [errors, setErrors] = useState([] as any); + const [uploadProgress, setUploadProgress] = useState(0); + const [loadingUpload, setLoadingUpload] = useState(false); + + const queryStatus = async (fileId: string) => { + try { + const response: any = await queryBatchStatus({ + batch: fileId, + }); + const status = response.data; // 0: Inprogress, 1: Completed, 2: Error + if (status === 1 || status === 2) { + clearInterval(statusInterval); + } + if (status === 1) { + updateStatus(BatchOperationStatus.Completed); + } else if (status === 2) { + updateStatus(BatchOperationStatus.Error); + } else { + updateStatus(BatchOperationStatus.Inprogress); + } + } catch (error) { + console.error('查询状态失败:', error); + clearInterval(statusInterval); + } + }; + + const changeFile = (file: any) => { + setUploadProgress(0); + if (file[0].name.endsWith('.xlsx') === true) { + setErrors([]); + setUploadDisabled(false); + } else { + setErrors(['Uploaded file must have an xlsx extension.']); + setUploadDisabled(true); + } + setFiles(file); + }; + + const handleUpload = async () => { + const formData = new FormData(); + formData.append('files', files[0]); + setLoadingUpload(true); + try { + const response = await axios.post( + `${BASE_URL}data-source/batch-create`, + formData, { - type: "info", - dismissible: true, - dismissLabel: "Dismiss message", - onDismiss: () => setItems([]), - content: ( - <> - This is an info flash message. It contains{" "} - . - - ), - id: "message_1" + headers: { + 'Content-Type': 'multipart/form-data', + }, + onUploadProgress: (progressEvent: any) => { + const percentCompleted = Math.round( + (progressEvent.loaded * 100) / progressEvent.total + ); + console.info('percentCompleted:', percentCompleted); + setUploadProgress(percentCompleted); + setFiles([]); + }, } - ] as any); - const [result, setResult] = useState("OK") - const changeFile=(file:any)=>{ - if(file[0].name.endsWith(".xlsx")===true){ - setErrors([]) - setDisable(false) - } else { - setErrors(["Uploaded file must have an xlsx extension."]) - } - setValue(file) + ); + setLoadingUpload(false); + const fileId = response.data.data; + localStorage.setItem('batchFileId', fileId); + updateStatus(BatchOperationStatus.Inprogress); + statusInterval = setInterval(() => queryStatus(fileId), 5000); + console.log(response.data); + } catch (error) { + setLoadingUpload(false); + console.error(error); } + }; - const batchCreate = async () => { - setIsLoading(true); - const result: any = await batchCreateDatasource({files: value}); - setIsLoading(false) - if(result){ - setResult("OK") - } else { - setResult("NG") + useEffect(() => { + const fileId = localStorage.getItem('batchFileId'); + if (fileId) { + queryStatus(fileId); + statusInterval = setInterval(() => queryStatus(fileId), 5000); + } + return () => { + clearInterval(statusInterval); + }; + }, []); + + return ( + + + {t('datasource:batch.step1Title')} +
+ } + > +

+ + + {t('datasource:batch.step1Download')} + +

+ + + {t('datasource:batch.step2Title')} + + } + > + + + {t('datasource:batch.step3Title')} } - }; + > + + + { + changeFile(detail.value); + }} + value={files} + i18nStrings={{ + uploadButtonText: (e) => (e ? 'Choose files' : 'Choose file'), + dropzoneText: (e) => + e ? 'Drop files to upload' : 'Drop file to upload', + removeFileAriaLabel: (e) => `Remove file ${e + 1}`, + limitShowFewer: 'Show fewer files', + limitShowMore: 'Show more files', + errorIconAriaLabel: 'Error', + }} + invalid + fileErrors={errors} + accept=".xlsx" + showFileLastModified + showFileSize + showFileThumbnail + tokenLimit={1} + constraintText=".xlsx files only" + /> + + {uploadProgress > 0 && ( + + + {uploadProgress >= 100 && ( + {t('uploadSuccess')} + )} + + )} + + + + + ); +}; + +const BatchOperation: React.FC = () => { + const { t, i18n } = useTranslation(); + const breadcrumbItems = [ + { text: t('breadcrumb.home'), href: RouterEnum.Home.path }, + { + text: t('breadcrumb.dataSourceConnection'), + href: RouterEnum.DataSourceConnection.path, + }, + ]; + const [flashBar, setFlashBar] = useState( + [] + ); + + const [status, setStatus] = useState(BatchOperationStatus.NotStarted); + + const downloadReport = async () => { + console.log('download report'); + const fileName = localStorage.getItem('batchFileId'); + if (fileName) { + const response = await downloadBatchFiles({ + filename: fileName, + }); + console.info('response:', response); + // TODO: download file + } + }; + + useEffect(() => { + if (status === BatchOperationStatus.Completed) { + setFlashBar([ + { + header: 'Successfully create data sources', + type: 'success', + dismissible: true, + content: 'Please download the report and check the result.', + id: 'success', + action: ( + + ), + }, + ]); + } + if (status === BatchOperationStatus.Error) { + setFlashBar([ + { + header: 'Failed create data sources in batch', + type: 'error', + dismissible: true, + content: + 'Please download the report and fix the data to upload again to retry.', + id: 'error', + action: ( + + ), + }, + ]); + } + if (status === BatchOperationStatus.Inprogress) { + setFlashBar([ + { + loading: true, + header: 'In progress', + type: 'info', + dismissible: false, + content: + 'Creating databases, Please do not close this window. It will takes less than 15 minutes.', + id: 'info', + action: ( + + ), + }, + ]); + } + }, [status]); return ( - <> - - changeFile(detail.value)} - value={value} - i18nStrings={{ - uploadButtonText: e => - e ? "Choose files" : "Choose file", - dropzoneText: e => - e - ? "Drop files to upload" - : "Drop file to upload", - removeFileAriaLabel: e => - `Remove file ${e + 1}`, - limitShowFewer: "Show fewer files", - limitShowMore: "Show more files", - errorIconAriaLabel: "Error" - }} - invalid - fileErrors={errors} - showFileLastModified - showFileSize - showFileThumbnail - tokenLimit={3} - constraintText="Hint text for file requirements" - /> - - - - + } + tools={ + + } + content={ + }> + , + }, + ]} + /> + + } + headerSelector="#header" + breadcrumbs={} + navigation={} + navigationWidth={290} + /> ); -} +}; -export default BatchOperation \ No newline at end of file +export default BatchOperation; diff --git a/source/portal/src/pages/data-source-connection/componments/DataSourceList.tsx b/source/portal/src/pages/data-source-connection/componments/DataSourceList.tsx index 6ccfd21d..9bc6970c 100644 --- a/source/portal/src/pages/data-source-connection/componments/DataSourceList.tsx +++ b/source/portal/src/pages/data-source-connection/componments/DataSourceList.tsx @@ -109,10 +109,12 @@ const DataSourceList: React.FC = memo((props: any) => { const filterTableName = tagType === DATA_TYPE_ENUM.s3 ? TABLE_NAME.SOURCE_S3_BUCKET - : (tagType === DATA_TYPE_ENUM.rds ? (TABLE_NAME.SOURCE_RDS_INSTANCE) - : (tagType === DATA_TYPE_ENUM.glue ? TABLE_NAME.SOURCE_GLUE_DATABASE - : TABLE_NAME.SOURCE_JDBC_CONNECTION)) - // : TABLE_NAME.SOURCE_RDS_INSTANCE; + : tagType === DATA_TYPE_ENUM.rds + ? TABLE_NAME.SOURCE_RDS_INSTANCE + : tagType === DATA_TYPE_ENUM.glue + ? TABLE_NAME.SOURCE_GLUE_DATABASE + : TABLE_NAME.SOURCE_JDBC_CONNECTION; + // : TABLE_NAME.SOURCE_RDS_INSTANCE; const resFilterProps = { totalCount, columnList: columnList.filter((i) => i.filter), diff --git a/source/portal/src/pages/left-menu/Navigation.tsx b/source/portal/src/pages/left-menu/Navigation.tsx index af1f5125..3ec2ebad 100644 --- a/source/portal/src/pages/left-menu/Navigation.tsx +++ b/source/portal/src/pages/left-menu/Navigation.tsx @@ -57,7 +57,19 @@ const Navigation: React.FC = (props: INavigationProps) => { }, ], }, + { + type: 'section', + text: t('nav.systemSettings'), + items: [ + { + type: 'link', + text: t('nav.systemSettings'), + href: RouterEnum.SystemSettings.path, + }, + ], + }, { type: 'divider' }, + { type: 'link', text: t('nav.doc'), diff --git a/source/portal/src/pages/system-settings/index.tsx b/source/portal/src/pages/system-settings/index.tsx new file mode 100644 index 00000000..b6a10344 --- /dev/null +++ b/source/portal/src/pages/system-settings/index.tsx @@ -0,0 +1,130 @@ +import { + AppLayout, + Button, + Container, + ContentLayout, + Form, + FormField, + Header, + Input, + SpaceBetween, +} from '@cloudscape-design/components'; +import React from 'react'; +import CustomBreadCrumb from 'pages/left-menu/CustomBreadCrumb'; +import Navigation from 'pages/left-menu/Navigation'; +import { RouterEnum } from 'routers/routerEnum'; +import { useTranslation } from 'react-i18next'; +import { useNavigate } from 'react-router-dom'; + +const SettingsHeader: React.FC = () => { + const { t } = useTranslation(); + return ( +
+ {t('settings.title')} +
+ ); +}; + +const SystemSettingContent = () => { + const navigate = useNavigate(); + const { t } = useTranslation(); + + return ( +
+ + + + } + > + {t('settings.rdsDataSourceDiscovery')} + } + > + + + + + + + + + + +
+
+ + + +
+
+ + + +
+
+
+ + +
{t('settings.subnetDesc')}
+
+ +
+ {t('settings.estimateResult', { + ipCount: 100, + })} +
+
+
+
+
+
+ ); +}; + +const SystemSetting: React.FC = () => { + const { t } = useTranslation(); + const breadcrumbItems = [ + { text: t('breadcrumb.home'), href: RouterEnum.Home.path }, + { + text: t('breadcrumb.dataSourceConnection'), + href: RouterEnum.DataSourceConnection.path, + }, + ]; + return ( + }> + + + } + headerSelector="#header" + breadcrumbs={} + navigation={} + navigationWidth={290} + /> + ); +}; + +export default SystemSetting; diff --git a/source/portal/src/routers/routerEnum.tsx b/source/portal/src/routers/routerEnum.tsx index d2dee928..f9a3bd40 100644 --- a/source/portal/src/routers/routerEnum.tsx +++ b/source/portal/src/routers/routerEnum.tsx @@ -13,7 +13,8 @@ import CreateJobOld from 'pages/create-job/indexOld'; import GlueJob from 'pages/glue-job'; import LoginCallback from 'pages/login-callback'; import TimeLine from 'pages/time-line'; -import BatchOperation from 'pages/batch-operation' +import BatchOperation from 'pages/batch-operation'; +import SystemSetting from 'pages/system-settings'; interface RouterEnumType { path: string; @@ -69,5 +70,9 @@ export const RouterEnum: Record = { BatchOperation: { path: '/batch-operation', element: , - } + }, + SystemSettings: { + path: '/system-settings', + element: , + }, }; From c421411736fa4a4517b469acd1645394a0b44e12 Mon Sep 17 00:00:00 2001 From: Magic Chen Date: Mon, 22 Jan 2024 17:13:51 +0800 Subject: [PATCH 003/112] fix: remove xlsx file --- .../public/files/BatchCreateConnections.xlsx | Bin 10070 -> 0 bytes .../portal/src/pages/batch-operation/index.tsx | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 source/portal/public/files/BatchCreateConnections.xlsx diff --git a/source/portal/public/files/BatchCreateConnections.xlsx b/source/portal/public/files/BatchCreateConnections.xlsx deleted file mode 100644 index 5d5682e95dc8d8775a905665e03f46dc673be08c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 10070 zcmeHtg;QP0_VvYryIXLAyK{ly?h@SH-6gmOcL^HY-QC@SI|O$L&X1XS@0*#-`~HIW z`c`$swuR2u{Uhz17$PytY&8iF>~jz-pw zI!bP~Mh;qZu2z{xqswX-ZV+>eadIy#ZYX@F$Ah8_hLp-p~?QXErs4BpS%8A}5y$m{)0aK-9{ zoP*0<8!{?u$Q;8(Gf3N0sI%r~lpfkw+qB}~;lv&*7|z=Hazv_%l==!y_76#t?G6-; z%yYU1plb|4(QOE;s`k+Y|e~|Hk834wxZd{AIKDv zH~(jTw+P3VZYN-`z?Tq$PV9klJ8hf*D-WoY9dN9qEaWm5&gihH5F7 z$%&Y~uh(&@nzdBFOVWec1i77!4LJW~Mc4vEz3HMZ3_}5TN3}fThOimC8UusUKPh|f zCklaza(sAHasrEx>VVqB8#_poiN^8cj)Dw3IQJW|&~?*v5R3UZgA#&=X(xQB}=_v3xOxdw~Ni%YNX1p^rFt`4; z(*Fz@_QSz1$zTA$$XoV<`1ZtG$ow@}N|l$bx7m=q=qG)UuFK_S$6}F8^j0SULu|IK z;*W+5C`PC;LP>f0kX!tU^aFQ9c^J#%SRNg zD@D`1RL=lmS&Dnv;HyXOKNF~Y#PXks29oZh&Ts;&QMx{;L5P-apu;WmMF#P!`>W_f zt$mn!B4HnB<)}~-JiDD{9W=g5sO3_OG-DAeZq*M{QFH(mY|lR<7wQmq|K3RzG{831 zCqK^KU1%!(AVT|bkOYYJj(_KF2_=Bh0u3Eso@F-!6#xCxgW4GU!a=HkcO>U?iHV

ht#fx(N34tD%w`Y7oFhJxBito5&aIPJr<%D_-JJ8dR z#6<{mUPV{ibS(D{DQWp;*7@F7l({V~Jm(348f397ox)N#Wt{5|hn%er^t9O#|$A7<%hb&0w1qKPkZT490>;A1~H~;=3hgw6yOA#ux-<9;p^3aW?S{ z)c`M?zFoVJyzp61WOHp@_~)sWhswe74~yPKjG1L1K~FDngL|&WNf39bKhukWUlZVF7BF!7biQ zzF_~4ARbhzNVR?gF*+;&0RIPw9UR>(jU0a4??+WMqSD#Wyenp3b?+&?berI!^}vNv zGK9n8RT7-3Z{|MhmEHM8nDo>h`kXN`Fqg(!vBycH}XNarM|aflFXbe<8cj_W){fX3s1h@3_||#icZ`f`*^*IN(Ut6X$J=tNl)o zhnX`Y4FBR)P&$VP%OLT0IcS;$8Cwi%w4IdjRhOQw~)2#hrbemkshCK@Sd{U(re&* zC`j?otH3g}PdM1N_^RU#DB|E5Dw?R{5@V&6M1B`Nr0;)5u4T?czpHO3(Ht?*k1HU9 zYELSkIGD0Z*#v!$n3)7zz$AcaTPl7yAL$mz&33Ou+kV<<7;J_DcY^`3E=+`wngq6U zJ8lp{H*s`D9KD!0l?-(u=Iq(*qnp$jO=;8j=+|mBr`4|qjxjGVW0q8GuGeV$G?e!jH|U04@g{z%kReAyPg+5CjZ$H`o$sZ7nlW5g}l9 zy->``@`oQMBy6r*4s>l^rhy?Qc=TuIc#@BT)Vv=LWyatzXbEP_mr*D5F<*-2VRtEr z!sdWZMn1g|2nc1Q5~l8MM2*TFNw)S4`HJ)n+Q zo3s)~c)KRjzK2FV;nJO4n&H$MmQH->uZ3rAlS8a-8%1~Q2%P^}kU10^#TEoToC16} z+|SNOO9!8ML9}5l&7Ef8R_y}YpXzP1enkUKSuio~^Rzpw&3t;y`mf48};+)G=7=2Jd zF^!$Z9KP%15d&h~BwsNG@bLZDk};_B70R$kV(+y5^yNIfn21P@;S(f$Jc?RnHyL9D z;3Fv3KNVMzWmj$YR11Uz?kk+)6D?ipWioQV-}0FLNr8gmZF*1kyocF9k1?e}F0}hj zP0%{)m8nTe*Hd-M;qsu%e3}r-U)>{L2dq$Y;O6LAy`z4*0K}BVFX3DL0@qQ3b+jJ5 z+{0XU%cnEt5sbA-9j%%s{Nkv<`79-$${+%`<_%%k3Vupn&7R;4;{ubT`7AUYjh=bQ z6K%PMYYRCcar?#~y1+Vfm!>6j!|wersB3Wk{B5z66Y|mUCC3ViQmVx4yXq0AOuT^4 zpbDhoTSI+tvXfBNjo}oW8@|bVF5f z6>eSidxxazpQy9u*a_(~c|2au53Yhcczmw+?|XEro9d_}ts%)26;J1)bY7pIG1qiD zUd|6M*binuTy*z#Jl&5Zb-Z41-BfO^pwqW|U0xoKYC!Z6AR-Kn` z8k~`b>-i!)UV!jr^fCza#(qTJcP=~~83fhVAUy1Lsyyjl##8NwXa#myEH)EdG?9Ku zsSv4xSTTrj#&Y4>C{7(YKex@4T8+DE3T+9UjIlVSlPCrJ++)uAw56~ZS!whGJ`5%F z8%Ni)Vn>x=1b#`2V94$~57U-ZW$5)OQiXrW44I(4E|~&dt)PC$NVgGTz|!T|npH(l zp~W1Jry+^n+j}s0hZODhkC@WQA1iPQF|y*fha2uN+52>fl9$ty3b}_-UOgDl1W5ki z3ckVN?|^~ZMtFR$)yozuX}>?+6pqCl!{fXVDF<{|l8PPd^(IFk5{_~47svFBTI3eu z5yX?LW?X52uJ3*}CT@h-F;9*E5b|I&j$hRpkF8G(ZnOlfU#J@kW#;C(DoR*t z!BTXL=*RMC;B`=))MFmmYnu8*teb#;EKy25IIet?t_xxmTWs;2(mn}_+UVy+yj7)% zjh&w>21O|Hdfq*b(5i$M$8Nfkd&U<8=?zxUU4otP8OnKxeTLYU4)LQ%C^eHLcZ^d{ zEn8}Do0OerP)3Qm2G5>G8#vXtgsvOzK9-m6<%tATQX7 z8vW~J*`v&rYgFWnj7d{zb7~PRMC`pQrDmjHlEk-tb)K`eve_f=eI*yQq<0jHGgx|3 z#<=oMSn@gu0%+PM0oscxs#A`tQ*wt%+3F&8yD$t^=;*Gyw;RH3yTSZR{6WTe@p}g7 z1+^)y5#=!BDI)H1%ta|6a{Bblgi(%)i_La!=1ykCZNho9BoU4^kqpxeewdpy++&H4 zYW>YA!Lv-VP5Mo^_RNQoh{zVg#2*@c$;lNbd%3Oe>&&4Vs7P2W?os!#dgfiX~xM6zhjI^_K5GL)uHuCfM=P)%dw*V^cjg)eLM9d zH9?vkk|Xz^D^nMVS{W{7dt=1~j6IN3rzg){q0|h5L%XVkwB*MXSQT~B%Fk@8R=_AdNM8@62 z@Jj2PF{Fzx(B0ODqd;L_PQWIKgyLZucQBq!H+&^dEfQ+ImGcAbt2d=39%E)u2=Ajc6;)ZUFye7iT(F2I4-lVXzS;fFS-Rbr&?N}gkc(|0# zC}!Q}!&bZIqNm*%GGWGJ-Efwgm9u%EaUBl@c$-Y_wlzQ6h%$9 z(6_u83K{^w`h$LdJKQ>$8W}k{(EmRC&cW$%(oty)NI_=_4+J#pUAj=KS<%Y8b397L z4{pv8&?*ey>T8CImtNOE1gaGfgsDN^N38Ez2mHxzj zxi>iC(KW;`aZou>H@p*8ed2h%({rBwp({y*2P$C2tjRT>7oe&Q(^p!7E8mkOfZK=H zn#xlZiA-}QR7}hQg_4t8Y9}i;h808yGt{Skm5Wk$L?og#%KuUyABk@o7S%;*j4;r@ zttLq+d(a70;UivuL~f05csD*g+v-zBTA&7I*KcGYVcRr|oYWuxibQRXL?ngz?GWz*fhF2n6U zmQ44NzMZ&+^hsrb?GviZa-9p$L`ja#dMZPqm`&ki?@)%6iRPDkvc94A&=`Z3vmBHa z=yNDW^JRyEn{2H+a77R#tA;GHCB9F_3rs`k4{Yz@>a`GkSWO2?RSymuMzx^n&MS6u zbO(mLW(p<*P9-60pl6rZBvU&|19vC$H%@212OYsAp9@LU`{1%*bN@WJ6_a>o2=xz; zVb~Hqs>NxgyVQmMjTT^N6mace&0Ajbd5ewL*WWwOxK-5>@Z^Y8hS8tji7rNl z^0*zlD6xOE65@Z9SF;*O)7fry3hsPI4~@3ePeo8A)d)pwOR%3WSCp6}AnBpVt3d|l z28Id)2SEm*2xE+d4Dp#4OE8Nu#X*gI0OHZ`P6G<`r%D+ZJ3~fi!)gOQhW{G7T z%^Q#S!k?L9Pu4Rr+Q`@|(ie%$_G87hr{52^VnMaOzmnbMY(dpdFlO{SBHElXKRhDX zm-=0`9_e@ov1VU-kE}APv&Xx)JoR6bAQUMN)6IY+R8f5lV(A|uH~sl;RZwV^G~cYj zvfkCJ4C#^mjIn1Sg>*W@p6-0dYU({a*JA@AntN-CHNyhOGMQg${XtpZi~H{z&p)@hjipEAyrZ zp#1?~2U9(JBSS?;doyd3-%q!e%33x{#%S%opKi@tOo`@LghS+L`P0~>6_pZKWxlxK z0)3^QQ%jpHcmaZUO9CzVl4+Gw5k|CG&XS$cI|HM8bzXBFl?Ij+L`_x4R~P9HnOAsF z>ptlc_D4ebvBdP}4le#cQA7Dq$;&0sj?0rhmmh9l%>D2%W5c8{?%)OPA7hpqy5r69 zehfSM%9^Tkp+U(p@+*M0mE?s+WCn+&qOd$E&4aR(p%BTS2ACEZZfG;ynGuqSF-?R@ zhZSRtgKK6N{IVPngDG!$L5PvF&7iEbp@kKuf9X*VmayZ4aZP z^XjGR9457rN>0F}_0(H{g*K#-T@x_gEOQs))ns!kg8?Be)mTfj zX{>3jR-%XQcuQl0AXDZ(!a`L#>2j$&>ErK<98RY(;j%D8N8OMj@4-j2=ySS|Z^g03 z8n9p&JgWJ_@YlF>&K`SBClW^?vG;Q&(mYR*EgW3J|E1Q}a(U2Ty?AWKTA0p_O->5ei=bD{8ouIDwboV0B^K zJ!znthF)DPE~V-SOJub*Wf)4_KQ4u8r1U!XMo*&N|A*sNTF!wf#AxQo3RJtap23 z)}5m5zm=`b*PG-9J^&WSdXO!o1Q)Cx`fMNetoNx@T`;hz=QyKr<2Y;MCd_n_&*|1B z%LQm3fe0Q<7$qEgeF|-$N{Q%Z5X(x`&y8XT6&j$K(q^s8IAgk%pbc_O>9?YBWOxN5 zOcI&W29tiqpB)hl^XwvbvK7lPTI^W0creo^i`d(?Ii9jCrV{L*cvBmI(nB*DmHtfb z_@Fy$V!dWDsqtEvjmXCV^28VN^j2K{XZ6|RMh9N@jk-PF1PkOh!NSnSK+fLA)`8x@ z#@^^(Qv3gOinpyVB353qhXE~c9pY7R$Rn+GhEs6acQ_Ld#+jl$aK{<8UR*rK>2@p zj5}QCE1HYTAmaxQ6jvKggF8vLpy*FL%Q9xWMA+?9NK57PVbNG&c@!bHp z1Ds1|C3fikXEaYCwzF@CAbtjgS?_jfzT{&0c;F{5y>LxZqngmJwqIl**cDiILE1U` zsB_{h*GDd?5%lauU5q@u6jk6z%$@A*XRSCEW#D+)r>ZPOYg!Rum*1MkA zKP8{#AY$GSjroLJ`O#2_@9%mF06{O&zIrA-y%k9R8HKmdOo>TvQAqN}HAsJrLOomC z{{`V&^!=-)$4c8Q|0b}4-4ej9;&QIa!TFZvh;d(6=77XHn@E2%$gU<0DUCUvkE)-V zJ#b!%B%s&k%_%HL%%FE4+}AJcwU-YA^^2^7eUt#6wCi}HLc~JXT`+zm`|^29-RC8b zjgx$E2LM|Sr5c(@QI;pPEMQ{Wb<&wZ{$m`ez>VHuh6A=?s@)k$OgnKmApnfOFuPss zgCHYkx%-384A+t`;#Ze}-cI$^yqyfz-ileo@gE^o*;`$zm=yXNEBG|7WP40~=^B+4 zi&OGR661<>b^Sf5qWk@>;AHEthwn!#+av9;>kiH4{PUAx9Xs57lXC8ikls~FT$6L7 zhJZjW#z$1iimAOu+(T#p1jy7F&E+%88aKG;L26wFxgryA3=~}TA~Jukk|f7#vJ3NWJol3|!?ao4vntyYuCtO3 zO7+>Qy;bC2y%MP*kosN*?NPYS;=>-~;F@WzO<#ern-u%%qM~^D+yJuZFE}Dct{GjsDfo zucG9ip3dH6%3mbPUyXkiL;f_bh5Oz3Pif>=(_hQAf0{}o{m0_%uMU1aaQ^9F6!ZVR z_&<)Hzk2yK@BPzDIPM=`@M{+QtB1ey>Yw%ifYn>n{DWnGHUImD_*e5QqQ98`YkQQF Vgm_zB008Fg_xX*;J4t{0_kVwVzo-BJ diff --git a/source/portal/src/pages/batch-operation/index.tsx b/source/portal/src/pages/batch-operation/index.tsx index 731bcf79..25a1c3f5 100644 --- a/source/portal/src/pages/batch-operation/index.tsx +++ b/source/portal/src/pages/batch-operation/index.tsx @@ -238,7 +238,7 @@ const BatchOperation: React.FC = () => { const fileName = localStorage.getItem('batchFileId'); if (fileName) { const response = await downloadBatchFiles({ - filename: fileName, + filename: 'batch_1705900337.8425026', }); console.info('response:', response); // TODO: download file From a4f70206912f62042fda33f8c4a5570891112ca5 Mon Sep 17 00:00:00 2001 From: junzhong Date: Tue, 23 Jan 2024 11:06:33 +0800 Subject: [PATCH 004/112] feat(be): Always update the schema of MySQL --- source/constructs/api/common/constant.py | 11 ++ source/constructs/api/common/enum.py | 1 + .../api/common/reference_parameter.py | 1 + source/constructs/api/config/crud.py | 4 + source/constructs/api/config/main.py | 31 +++++ source/constructs/api/config/schemas.py | 20 +++ source/constructs/api/config/service.py | 45 +++++- source/constructs/api/data_source/crud.py | 18 +-- .../constructs/api/data_source/jdbc_schema.py | 128 ++++++++++++++++++ source/constructs/api/data_source/main.py | 4 +- source/constructs/api/data_source/service.py | 88 +----------- source/constructs/api/discovery_job/crud.py | 19 ++- .../constructs/api/discovery_job/service.py | 122 +++++++++++++---- .../constructs/api/lambda/auto_sync_data.py | 14 -- source/constructs/api/lambda/check_run.py | 14 -- source/constructs/api/lambda/controller.py | 51 ++++++- .../constructs/api/lambda/receive_job_info.py | 23 ---- .../constructs/api/lambda/refresh_account.py | 15 -- .../api/lambda/sync_crawler_results.py | 21 --- source/constructs/api/main.py | 4 +- source/constructs/api/tools/list_tool.py | 4 + source/constructs/lib/admin-stack.ts | 1 + source/constructs/lib/admin/alb-stack.ts | 1 + source/constructs/lib/admin/api-stack.ts | 36 ++--- .../delete-resources/delete_resources.py | 6 +- source/constructs/lib/admin/rds-stack.ts | 5 +- source/constructs/lib/admin/vpc-stack.ts | 6 +- .../lib/agent/split-job/split_job.py | 2 +- 28 files changed, 449 insertions(+), 246 deletions(-) create mode 100644 source/constructs/api/config/main.py create mode 100644 source/constructs/api/config/schemas.py create mode 100644 source/constructs/api/data_source/jdbc_schema.py delete mode 100644 source/constructs/api/lambda/check_run.py delete mode 100644 source/constructs/api/lambda/receive_job_info.py delete mode 100644 source/constructs/api/lambda/refresh_account.py create mode 100644 source/constructs/api/tools/list_tool.py diff --git a/source/constructs/api/common/constant.py b/source/constructs/api/common/constant.py index c3574119..551d0329 100644 --- a/source/constructs/api/common/constant.py +++ b/source/constructs/api/common/constant.py @@ -98,6 +98,17 @@ def __setattr__(self, name, value): const.ZERO = 0 const.BATCH_CREATE_LIMIT = 100 const.BATCH_SHEET = "OriginTemplate" +const.CONFIG_CONCURRENT_RUN_INSTANCE_NUMBER = 'ConcurrentRunInstanceNumber' +const.CONFIG_CONCURRENT_RUN_INSTANCE_NUMBER_DEFAULT_VALUE = 50 +const.CONFIG_JOB_NUMBER_S3 = 'JobNumberS3' +const.CONFIG_JOB_NUMBER_S3_DEFAULT_VALUE = 10 +const.CONFIG_JOB_NUMBER_RDS = 'JobNumberRds' +const.CONFIG_JOB_NUMBER_RDS_DEFAULT_VALUE = 3 +const.CONTROLLER_ACTION = 'Action' +const.CONTROLLER_ACTION_SCHEDULE_JOB = 'ScheduleJob' +const.CONTROLLER_ACTION_CHECK_RUNNING_RUN_DATABASES = 'CheckRunningRunDatabases' +const.CONTROLLER_ACTION_CHECK_PENDING_RUN_DATABASES = 'CheckPendingRunDatabases' +const.CONTROLLER_ACTION_REFRESH_ACCOUNT = 'RefreshAccount' const.UNSTRUCTURED_FILES = { "document": ["doc", "docx", "pdf", "ppt", "pptx", "xls", "xlsx", "odp"], diff --git a/source/constructs/api/common/enum.py b/source/constructs/api/common/enum.py index cc75d722..80dbbcda 100644 --- a/source/constructs/api/common/enum.py +++ b/source/constructs/api/common/enum.py @@ -166,6 +166,7 @@ class RunState(Enum): @unique class RunDatabaseState(Enum): READY = "Ready" + PENDING = "Pending" RUNNING = "Running" SUCCEEDED = "Succeeded" FAILED = "Failed" diff --git a/source/constructs/api/common/reference_parameter.py b/source/constructs/api/common/reference_parameter.py index 94758dd8..e11da107 100644 --- a/source/constructs/api/common/reference_parameter.py +++ b/source/constructs/api/common/reference_parameter.py @@ -11,3 +11,4 @@ partition = caller_identity['Arn'].split(':')[1] url_suffix = const.URL_SUFFIX_CN if partition == const.PARTITION_CN else '' public_account_id = const.PUBLIC_ACCOUNT_ID_CN if partition == const.PARTITION_CN else const.PUBLIC_ACCOUNT_ID_GLOBAL +admin_subnet_ids = os.getenv('SubnetIds', '').split(',') \ No newline at end of file diff --git a/source/constructs/api/config/crud.py b/source/constructs/api/config/crud.py index a3872926..09873de7 100644 --- a/source/constructs/api/config/crud.py +++ b/source/constructs/api/config/crud.py @@ -17,3 +17,7 @@ def set_value(key: str, value: str): config_value=value) session.add(db_config) session.commit() + + +def list_config(): + return get_session().query(models.Config).all() diff --git a/source/constructs/api/config/main.py b/source/constructs/api/config/main.py new file mode 100644 index 00000000..148229cb --- /dev/null +++ b/source/constructs/api/config/main.py @@ -0,0 +1,31 @@ +from fastapi import APIRouter +from . import service, schemas +from common.request_wrapper import inject_session +from common.response_wrapper import BaseResponse +from discovery_job import service as discovery_job_service + +router = APIRouter(prefix="/config", tags=["config"]) + + +@router.get("", response_model=BaseResponse[list[schemas.ConfigBase]]) +@inject_session +def list_config(): + return service.list_config() + + +@router.post("") +@inject_session +def set_config(configs: list[schemas.ConfigBase]): + return service.set_configs(configs) + + +@router.get("/subnets", response_model=BaseResponse[list[schemas.SubnetInfo]]) +@inject_session +def list_subnets(): + return service.list_subnets() + + +@router.get("/run-database-ip-count", response_model=BaseResponse[int]) +@inject_session +def get_run_database_ip_count(database_type: str): + return discovery_job_service.get_run_database_ip_count(database_type) diff --git a/source/constructs/api/config/schemas.py b/source/constructs/api/config/schemas.py new file mode 100644 index 00000000..ce62f85e --- /dev/null +++ b/source/constructs/api/config/schemas.py @@ -0,0 +1,20 @@ +from typing import Optional +from pydantic import BaseModel +import db.models_config as models + + +class ConfigBase(BaseModel): + config_key: str + config_value: str + + class Meta: + orm_model = models.Config + + class Config: + orm_mode = True + + +class SubnetInfo(BaseModel): + subnet_id: str + name: Optional[str] + available_ip_address_count: int diff --git a/source/constructs/api/config/service.py b/source/constructs/api/config/service.py index 563247c9..4ab32bf0 100644 --- a/source/constructs/api/config/service.py +++ b/source/constructs/api/config/service.py @@ -1,9 +1,46 @@ -from . import crud +from . import crud,schemas +import boto3 +from common.reference_parameter import admin_subnet_ids def set_config(key: str, value: str): - return crud.set_value(key, value) + crud.set_value(key, value) + + +def get_config(key: str, default_value=None) -> str: + _value = crud.get_value(key) + if _value: + return _value + if default_value: + return default_value + return None + + +def list_config(): + return crud.list_config() + + +def set_configs(configs: list[schemas.ConfigBase]): + for config in configs: + set_config(config.config_key, config.config_value) + + +def list_subnets(): + ec2_client = boto3.client('ec2') + response = ec2_client.describe_subnets(SubnetIds=admin_subnet_ids) + subnet_infos = [] + for subnet in response['Subnets']: + subnet_info = schemas.SubnetInfo(subnet_id=subnet['SubnetId'], + name=__get_name(subnet['Tags']), + available_ip_address_count=subnet['AvailableIpAddressCount']) + subnet_infos.append(subnet_info) + return subnet_infos + + +def __get_name(tags: list) -> str: + for tag in tags: + if tag.get("Key") == "Name": + return tag.get("Value") + return None -def get_config(key: str) -> str: - return crud.get_value(key) diff --git a/source/constructs/api/data_source/crud.py b/source/constructs/api/data_source/crud.py index 4eb220e1..5b97b320 100644 --- a/source/constructs/api/data_source/crud.py +++ b/source/constructs/api/data_source/crud.py @@ -960,26 +960,29 @@ def get_connected_glue_database_count(): list = list_glue_database_source_without_condition() return 0 if not list else list.filter(SourceGlueDatabase.glue_state == ConnectionState.ACTIVE.value).count() -def get_schema_by_snapshot(provider_id, account_id, instance, region): + +def get_schema_by_snapshot(provider_id, account_id, region, instance): return get_session().query(JDBCInstanceSource.jdbc_connection_schema, JDBCInstanceSource.network_subnet_id) \ .filter(JDBCInstanceSource.account_provider_id == provider_id) \ .filter(JDBCInstanceSource.account_id == account_id) \ .filter(JDBCInstanceSource.instance_id == instance) \ - .filter(JDBCInstanceSource.region == region).all() + .filter(JDBCInstanceSource.region == region).first() + -def get_connection_by_instance(provider_id, account_id, instance, region): +def get_connection_by_instance(provider_id, account_id, region, instance): return get_session().query(JDBCInstanceSource.glue_connection) \ .filter(JDBCInstanceSource.account_provider_id == provider_id) \ .filter(JDBCInstanceSource.account_id == account_id) \ .filter(JDBCInstanceSource.instance_id == instance) \ - .filter(JDBCInstanceSource.region == region).all() + .filter(JDBCInstanceSource.region == region).first() -def get_crawler_glueDB_by_instance(provider_id, account_id, instance, region): + +def get_crawler_glue_db_by_instance(provider_id, account_id, region, instance): return get_session().query(JDBCInstanceSource.glue_crawler, JDBCInstanceSource.glue_database, JDBCInstanceSource.glue_connection) \ .filter(JDBCInstanceSource.account_provider_id == provider_id) \ .filter(JDBCInstanceSource.account_id == account_id) \ .filter(JDBCInstanceSource.instance_id == instance) \ - .filter(JDBCInstanceSource.region == region).all() + .filter(JDBCInstanceSource.region == region).first() def get_enable_account_list(): return get_session().query(Account.account_provider_id, Account.account_id, Account.region) \ @@ -991,7 +994,6 @@ def update_schema_by_account(provider_id, account_id, instance, region, schema): JDBCInstanceSource.region == region, JDBCInstanceSource.account_id == account_id, JDBCInstanceSource.instance_id == instance).first() - if not jdbc_instance_source: + if jdbc_instance_source: jdbc_instance_source.jdbc_connection_schema = schema - session.merge(jdbc_instance_source) session.commit() diff --git a/source/constructs/api/data_source/jdbc_schema.py b/source/constructs/api/data_source/jdbc_schema.py new file mode 100644 index 00000000..8a203542 --- /dev/null +++ b/source/constructs/api/data_source/jdbc_schema.py @@ -0,0 +1,128 @@ +import boto3 +import json +import traceback +from common.exception_handler import BizException +from common.enum import MessageEnum, Provider +from common.constant import const +from common.reference_parameter import logger, admin_account_id, admin_region, partition +from . import jdbc_database, crud +from .schemas import JdbcSource, JDBCInstanceSourceBase + +sts = boto3.client('sts') + + +def list_jdbc_databases(source: JdbcSource) -> list[str]: + url_arr = source.connection_url.split(":") + if len(url_arr) != 4: + raise BizException(MessageEnum.SOURCE_JDBC_URL_FORMAT_ERROR.get_code(), MessageEnum.SOURCE_JDBC_URL_FORMAT_ERROR.get_msg()) + if url_arr[1] != "mysql": + raise BizException(MessageEnum.SOURCE_JDBC_LIST_DATABASES_NOT_SUPPORTED.get_code(), MessageEnum.SOURCE_JDBC_LIST_DATABASES_NOT_SUPPORTED.get_msg()) + host = url_arr[2][2:] + port = int(url_arr[3].split("/")[0]) + user = source.username + password = source.password + if source.secret_id: + secrets_client = boto3.client('secretsmanager') + secret_response = secrets_client.get_secret_value(SecretId=source.secret_id) + secrets = json.loads(secret_response['SecretString']) + user = secrets['username'] + password = secrets['password'] + mysql_database = jdbc_database.MySQLDatabase(host, port, user, password) + databases = mysql_database.list_databases() + logger.info(databases) + return databases + + +def get_schema_by_snapshot(provider_id: int, account_id: str, region: str, instance: str): + res = crud.get_schema_by_snapshot(provider_id, account_id, region, instance) + return res[0].split('\n') if res else None, res[1] if res else None + + +def get_schema_by_real_time(provider_id: int, account_id: str, region: str, instance: str, db_info: bool = False): + db, subnet_id = None, None + assume_account, assume_region = __get_admin_info(JDBCInstanceSourceBase(account_provider_id=provider_id, account_id=account_id, instance_id=instance, region=region)) + connection_rds = crud.get_connection_by_instance(provider_id, account_id, region, instance) + glue = __get_glue_client(assume_account, assume_region) + connection = glue.get_connection(Name=connection_rds[0]).get('Connection', {}) + subnet_id = connection.get('PhysicalConnectionRequirements', {}).get('SubnetId') + if db_info: + connection_properties = connection.get("ConnectionProperties", {}) + jdbc_source = JdbcSource(username=connection_properties.get("USERNAME"), + password=connection_properties.get("PASSWORD"), + secret_id=connection_properties.get("SECRET_ID"), + connection_url=connection_properties.get("JDBC_CONNECTION_URL") + ) + try: + db = list_jdbc_databases(jdbc_source) + except Exception as e: + logger.info(e) + return db, subnet_id + + +def sync_schema_by_job(provider_id: int, account_id: str, region: str, instance: str, schemas: list): + jdbc_targets = [] + # Query Info + info = crud.get_crawler_glue_db_by_instance(provider_id, account_id, region, instance) + logger.info(f"info:{info}") + if not info: + return + for db_name in schemas: + trimmed_db_name = db_name.strip() + if trimmed_db_name: + jdbc_targets.append({ + 'ConnectionName': info[2], + 'Path': f"{trimmed_db_name}/%" + }) + # Update Crawler + assume_account, assume_region = __get_admin_info(JDBCInstanceSourceBase(account_provider_id=provider_id, account_id=account_id, instance_id=instance, region=region)) + crawler_role_arn = __gen_role_arn(account_id=assume_account, + region=assume_region, + role_name='GlueDetectionJobRole') + try: + logger.info(f"assume_account:{assume_account}") + logger.info(f"assume_region:{assume_region}") + __get_glue_client(assume_account, assume_region).update_crawler( + Name=info[0], + Role=crawler_role_arn, + DatabaseName=info[1], + Targets={ + 'JdbcTargets': jdbc_targets, + }, + SchemaChangePolicy={ + 'UpdateBehavior': 'UPDATE_IN_DATABASE', + 'DeleteBehavior': 'DELETE_FROM_DATABASE' + } + ) + except Exception as e: + logger.error(traceback.format_exc()) + raise BizException(MessageEnum.BIZ_UNKNOWN_ERR.get_code(), + MessageEnum.BIZ_UNKNOWN_ERR.get_msg()) + # Update RDS + crud.update_schema_by_account(provider_id, account_id, instance, region, "\n".join(schemas)) + + +def __get_admin_info(jdbc): + account_id = jdbc.account_id if jdbc.account_provider_id == Provider.AWS_CLOUD.value else admin_account_id + region = jdbc.region if jdbc.account_provider_id == Provider.AWS_CLOUD.value else admin_region + return account_id, region + + +def __get_glue_client(account, region): + iam_role_name = crud.get_iam_role(account) + logger.info(f"iam_role_name:{iam_role_name}") + assumed_role = sts.assume_role( + RoleArn=f"{iam_role_name}", + RoleSessionName="glue-connection" + ) + credentials = assumed_role['Credentials'] + glue = boto3.client('glue', + aws_access_key_id=credentials['AccessKeyId'], + aws_secret_access_key=credentials['SecretAccessKey'], + aws_session_token=credentials['SessionToken'], + region_name=region + ) + return glue + + +def __gen_role_arn(account_id: str, region: str, role_name: str): + return f'arn:{partition}:iam::{account_id}:role/{const.SOLUTION_NAME}{role_name}-{region}' diff --git a/source/constructs/api/data_source/main.py b/source/constructs/api/data_source/main.py index 3049c469..ae60cba4 100644 --- a/source/constructs/api/data_source/main.py +++ b/source/constructs/api/data_source/main.py @@ -7,7 +7,7 @@ from common.query_condition import QueryCondition from common.request_wrapper import inject_session from common.response_wrapper import BaseResponse -from . import crud, schemas, service +from . import crud, schemas, service, jdbc_schema router = APIRouter(prefix="/data-source", tags=["data-source"]) @@ -383,7 +383,7 @@ def query_connection_detail(account: schemas.JDBCInstanceSourceBase): @router.post("/jdbc-databases", response_model=BaseResponse[list[str]]) @inject_session def list_jdbc_databases(source: schemas.JdbcSource): - return service.list_jdbc_databases(source) + return jdbc_schema.list_jdbc_databases(source) @router.post("/batch-create", response_model=BaseResponse) @inject_session diff --git a/source/constructs/api/data_source/service.py b/source/constructs/api/data_source/service.py index 69397ed5..d8fdf69c 100644 --- a/source/constructs/api/data_source/service.py +++ b/source/constructs/api/data_source/service.py @@ -11,7 +11,6 @@ import boto3 from fastapi import File, UploadFile import openpyxl -import pandas as pd import pymysql from botocore.exceptions import ClientError @@ -32,7 +31,7 @@ from discovery_job.service import can_delete_database as can_delete_job_database from discovery_job.service import delete_account as delete_job_by_account from discovery_job.service import delete_database as delete_job_database -from . import s3_detector, rds_detector, glue_database_detector, jdbc_detector, crud, jdbc_database +from . import s3_detector, rds_detector, glue_database_detector, jdbc_detector, crud from .schemas import (AccountInfo, AdminAccountInfo, JDBCInstanceSource, JDBCInstanceSourceUpdate, ProviderResourceFullInfo, SourceNewAccount, SourceRegion, @@ -42,8 +41,7 @@ JDBCInstanceSourceUpdateBase, DataLocationInfo, JDBCInstanceSourceBase, - JDBCInstanceSourceFullInfo, - JdbcSource) + JDBCInstanceSourceFullInfo) from common.reference_parameter import logger, admin_account_id, admin_region, partition, admin_bucket_name SLEEP_TIME = 5 @@ -2610,27 +2608,6 @@ def __get_glue_client(account, region): ) return glue -def list_jdbc_databases(source: JdbcSource) -> list[str]: - url_arr = source.connection_url.split(":") - if len(url_arr) != 4: - raise BizException(MessageEnum.SOURCE_JDBC_URL_FORMAT_ERROR.get_code(), MessageEnum.SOURCE_JDBC_URL_FORMAT_ERROR.get_msg()) - if url_arr[1] != "mysql": - raise BizException(MessageEnum.SOURCE_JDBC_LIST_DATABASES_NOT_SUPPORTED.get_code(), MessageEnum.SOURCE_JDBC_LIST_DATABASES_NOT_SUPPORTED.get_msg()) - host = url_arr[2][2:] - port = int(url_arr[3].split("/")[0]) - user = source.username - password = source.password - if source.secret_id: - secrets_client = boto3.client('secretsmanager') - secret_response = secrets_client.get_secret_value(SecretId=source.secret_id) - secrets = json.loads(secret_response['SecretString']) - user = secrets['username'] - password = secrets['password'] - mysql_database = jdbc_database.MySQLDatabase(host, port, user, password) - databases = mysql_database.list_databases() - logger.info(databases) - return databases - def batch_create(file: UploadFile = File(...)): time_str = time.time() @@ -2708,67 +2685,6 @@ async def batch_create_jdbc(jdbc_list): await asyncio.gather(*tasks) -def get_schema_by_snapshot(provider_id: int, account_id: str, instance: str, region: str): - res = crud.get_schema_by_snapshot(provider_id, account_id, instance, region) - return res[0][0].split('\n') if res else None, res[0][1] if res else None - -def get_schema_by_real_time(provider_id: int, account_id: str, instance: str, region: str, db_info: bool = False): - db, subnet_id = None, None - assume_account, assume_region = __get_admin_info(JDBCInstanceSourceBase(account_provider_id=provider_id, account_id=account_id, instance_id=instance, region=region)) - connection_rds = crud.get_connection_by_instance(provider_id, account_id, instance, region) - glue = __get_glue_client(assume_account, assume_region) - connection = glue.get_connection(Name=connection_rds[0][0]).get('Connection', {}) - if connection_rds[0] and connection_rds[0][0]: - subnet_id = connection.get('PhysicalConnectionRequirements', {}).get('SubnetId') - if db_info: - connection_properties = connection.get("ConnectionProperties", {}) - jdbc_source = JdbcSource(username=connection_properties.get("USERNAME"), - password=connection_properties.get("PASSWORD"), - secret_id=connection_properties.get("SECRET_ID"), - connection_url=connection_properties.get("JDBC_CONNECTION_URL") - ) - db = list_jdbc_databases(jdbc_source) - return db, subnet_id - -def sync_schema_by_job(provider_id: int, account_id: str, instance: str, region: str, schema: str): - jdbc_targets = [] - # Query Info - info = crud.get_crawler_glueDB_by_instance(provider_id, account_id, instance, region) - if not info: - return - crawler_role_arn = __gen_role_arn(account_id=account_id, - region=region, - role_name='GlueDetectionJobRole') - db_names = schema.split("\n") - for db_name in db_names: - trimmed_db_name = db_name.strip() - if trimmed_db_name: - jdbc_targets.append({ - 'ConnectionName': info[0][2], - 'Path': f"{trimmed_db_name}/%" - }) - # Update Crawler - assume_account, assume_region = __get_admin_info(JDBCInstanceSourceBase(account_provider_id=provider_id, account_id=account_id, instance_id=instance, region=region)) - try: - __get_glue_client(assume_account, assume_region).update_crawler( - Name=info[0], - Role=crawler_role_arn, - DatabaseName=info[1], - Targets={ - 'JdbcTargets': jdbc_targets, - }, - SchemaChangePolicy={ - 'UpdateBehavior': 'UPDATE_IN_DATABASE', - 'DeleteBehavior': 'DELETE_FROM_DATABASE' - } - ) - except Exception as e: - logger.error(traceback.format_exc()) - raise BizException(MessageEnum.BIZ_UNKNOWN_ERR.get_code(), - MessageEnum.BIZ_UNKNOWN_ERR.get_msg()) - # Update RDS - crud.update_schema_by_account(provider_id, account_id, instance, region, schema) - def __get_admin_info(jdbc): account_id = jdbc.account_id if jdbc.account_provider_id == Provider.AWS_CLOUD.value else admin_account_id region = jdbc.region if jdbc.account_provider_id == Provider.AWS_CLOUD.value else admin_region diff --git a/source/constructs/api/discovery_job/crud.py b/source/constructs/api/discovery_job/crud.py index 9da241e5..16f90c99 100644 --- a/source/constructs/api/discovery_job/crud.py +++ b/source/constructs/api/discovery_job/crud.py @@ -9,7 +9,7 @@ from sqlalchemy import func from common.constant import const import uuid -import datetime +from datetime import datetime, timedelta from catalog.crud import get_catalog_database_level_classification_by_type_all,get_catalog_database_level_classification_by_params from template.service import get_template_snapshot_no from tools.str_tool import is_empty @@ -171,7 +171,7 @@ def init_run(job_id: int) -> int: catalog_databases = get_catalog_database_level_classification_by_params(job_database.account_id,job_database.region,job_database.database_type).all() for catalog_database in catalog_databases: base_time = base_time_dict.get( - f'{job_database.account_id}-{job_database.region}-{job_database.database_type}-{catalog_database.database_name}', datetime.datetime.min) + f'{job_database.account_id}-{job_database.region}-{job_database.database_type}-{catalog_database.database_name}', datetime.min) run_database = models.DiscoveryJobRunDatabase(run_id=run.id, account_id=job_database.account_id, region=job_database.region, @@ -293,7 +293,7 @@ def get_run_database(run_database_id: int) -> models.DiscoveryJobRunDatabase: return session.query(models.DiscoveryJobRunDatabase).get(run_database_id) -def update_job_database_base_time(job_id: int, account_id: str, region: str, database_type: str, database_name: str, base_time: datetime.datetime): +def update_job_database_base_time(job_id: int, account_id: str, region: str, database_type: str, database_name: str, base_time: datetime): session = get_session() job_database = schemas.DiscoveryJobDatabaseBaseTime(base_time=base_time) session.query(models.DiscoveryJobDatabase).filter(models.DiscoveryJobDatabase.job_id == job_id, @@ -304,10 +304,17 @@ def update_job_database_base_time(job_id: int, account_id: str, region: str, dat session.commit() -def get_running_run_databases() -> list[models.DiscoveryJobRunDatabase]: +def get_run_databases_by_state(state: RunDatabaseState) -> list[models.DiscoveryJobRunDatabase]: session = get_session() - db_run_databases = session.query(models.DiscoveryJobRunDatabase).filter(models.DiscoveryJobRunDatabase.state == RunDatabaseState.RUNNING.value).all() - return db_run_databases + return session.query(models.DiscoveryJobRunDatabase).filter(models.DiscoveryJobRunDatabase.state == state.value).all() + + +def get_running_run_databases() -> list[models.DiscoveryJobRunDatabase]: + return get_run_databases_by_state(RunDatabaseState.RUNNING) + + +def get_pending_run_databases() -> list[models.DiscoveryJobRunDatabase]: + return get_run_databases_by_state(RunDatabaseState.PENDING) def count_account_run_job(account_id: str, regin: str): diff --git a/source/constructs/api/discovery_job/service.py b/source/constructs/api/discovery_job/service.py index c6c4862d..3d820ac0 100644 --- a/source/constructs/api/discovery_job/service.py +++ b/source/constructs/api/discovery_job/service.py @@ -7,7 +7,7 @@ from common.enum import MessageEnum, JobState, RunState, RunDatabaseState, DatabaseType, AthenaQueryState from common.constant import const from common.query_condition import QueryCondition -from common.reference_parameter import logger, admin_account_id, admin_region, admin_bucket_name, partition, url_suffix, public_account_id +from common.reference_parameter import logger, admin_account_id, admin_region, admin_bucket_name, partition, url_suffix, public_account_id, admin_subnet_ids import traceback import tools.mytime as mytime import datetime, time, pytz @@ -15,7 +15,10 @@ from tempfile import NamedTemporaryFile from catalog.service import sync_job_detection_result from tools.str_tool import is_empty -from common.abilities import need_change_account_id +from common.abilities import need_change_account_id, convert_database_type_2_provider +import config.service as config_service +from data_source import jdbc_schema +from tools import list_tool version = os.getenv(const.VERSION, '') controller_function_name = os.getenv("ControllerFunctionName", f"{const.SOLUTION_NAME}-Controller") @@ -75,7 +78,7 @@ def create_event(job_id: int, schedule: str): ], ) - input = {"JobId": job_id} + input = {const.CONTROLLER_ACTION: const.CONTROLLER_ACTION_SCHEDULE_JOB, "JobId": job_id} response = client_events.put_targets( Rule=rule_name, Targets=[ @@ -195,7 +198,16 @@ def disable_job(id: int): def start_job(job_id: int): run_id = crud.init_run(job_id) if run_id >= 0: - __start_run(job_id, run_id) + run = crud.get_run(run_id) + if not run.databases: + crud.complete_run(run_id) + raise BizException(MessageEnum.DISCOVERY_JOB_DATABASE_IS_EMPTY.get_code(), + MessageEnum.DISCOVERY_JOB_DATABASE_IS_EMPTY.get_msg()) + failed_run_database_count = __start_run_databases(run.databases) + if failed_run_database_count == len(run.databases): + crud.complete_run(run_id) + raise BizException(MessageEnum.DISCOVERY_JOB_ALL_RUN_FAILED.get_code(), + MessageEnum.DISCOVERY_JOB_ALL_RUN_FAILED.get_msg()) def start_sample_job(job_id: int, table_name: str): @@ -205,14 +217,36 @@ def start_sample_job(job_id: int, table_name: str): __start_sample_run(job_id, run_id, table_name) -def __start_run(job_id: int, run_id: int): - job = crud.get_job(job_id) - run = crud.get_run(run_id) - run_databases = run.databases - if not run_databases: - crud.complete_run(run_id) - raise BizException(MessageEnum.DISCOVERY_JOB_DATABASE_IS_EMPTY.get_code(), - MessageEnum.DISCOVERY_JOB_DATABASE_IS_EMPTY.get_msg()) +def __get_job_number(database_type: str) -> int: + if database_type in [DatabaseType.S3.value, DatabaseType.GLUE.value]: + return int(config_service.get_config(const.CONFIG_JOB_NUMBER_S3, const.CONFIG_JOB_NUMBER_S3_DEFAULT_VALUE)) + return int(config_service.get_config(const.CONFIG_JOB_NUMBER_RDS, const.CONFIG_JOB_NUMBER_RDS_DEFAULT_VALUE)) + + +def get_run_database_ip_count(database_type: str) -> int: + crawler_ip = 0 + if database_type.startswith(DatabaseType.JDBC.value): + crawler_ip = 3 + return crawler_ip + __get_job_number(database_type) * 2 # Each GlueJob requires 2 IPs + + +def __count_run_database_by_subnet() -> dict: + count_run_database = {} + run_databases = crud.get_running_run_databases() + for run_database in run_databases: + if not need_change_account_id(run_database.database_type): + continue + provider_id = convert_database_type_2_provider(run_database.database_type) + _, subnet_id = jdbc_schema.get_schema_by_real_time(provider_id, run_database.account_id, run_database.region, run_database.database_name) + count = count_run_database.get(subnet_id, 0) + count_run_database[subnet_id] = count + 1 + logger.info(f"count_run_database:{count_run_database}") + return count_run_database + + +def __start_run_databases(run_databases): + job_dic = {} + run_dic = {} module_path = f's3://{admin_bucket_name}/job/ml-asset/python-module/' wheels = ["humanfriendly-10.0-py2.py3-none-any.whl", "protobuf-4.22.1-cp37-abi3-manylinux2014_x86_64.whl", @@ -221,12 +255,14 @@ def __start_run(job_id: int, run_id: int): "onnxruntime-1.13.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", "sdpsner-1.0.0-py3-none-any.whl", ] + limit_concurrency = False account_loop_wait = {} for run_database in run_databases: account_id = run_database.account_id if need_change_account_id(run_database.database_type): account_id = admin_account_id + limit_concurrency = True if account_id in account_loop_wait: tmp = account_loop_wait[account_id] tmp = tmp + const.JOB_INTERVAL_WAIT @@ -234,11 +270,42 @@ def __start_run(job_id: int, run_id: int): else: account_loop_wait[account_id] = const.JOB_INTERVAL_WAIT + if limit_concurrency: + concurrent_run_instance_number = config_service.get_config(const.CONFIG_CONCURRENT_RUN_INSTANCE_NUMBER, const.CONFIG_CONCURRENT_RUN_INSTANCE_NUMBER_DEFAULT_VALUE) + count_run_database = __count_run_database_by_subnet() + job_placeholder = "," account_first_wait = {} - failed_run_count = 0 + failed_run_database_count = 0 for run_database in run_databases: try: + if limit_concurrency: + provider_id = convert_database_type_2_provider(run_database.database_type) + database_schemas_real_time, subnet_id = jdbc_schema.get_schema_by_real_time(provider_id, run_database.account_id, run_database.region, run_database.database_name, True) + count = count_run_database.get(subnet_id, 0) + if count >= concurrent_run_instance_number: + run_database.state = RunDatabaseState.PENDING.value + continue + count_run_database[subnet_id] = count + 1 + if database_schemas_real_time: + database_schemas_snapshot, _ = jdbc_schema.get_schema_by_snapshot(provider_id, run_database.account_id, run_database.region, run_database.database_name) + logger.info(f'database_schemas_real_time:{database_schemas_real_time}') + logger.info(f'database_schemas_snapshot:{database_schemas_snapshot}') + if not list_tool.compare(database_schemas_real_time, database_schemas_snapshot): + jdbc_schema.sync_schema_by_job(provider_id, run_database.account_id, run_database.region, run_database.database_name, database_schemas_real_time) + logger.info(f'Updated schema:{database_schemas_real_time}') + else: + logger.info(f'Unable to obtain the schema for {run_database.database_name}') + + run = run_dic.get(run_database.run_id) + if not run: + run = crud.get_run(run_database.run_id) + run_dic[run_database.run_id] = run + job = job_dic.get(run.job_id) + if not job: + job = crud.get_job(run.job_id) + job_dic[run.job_id] = job + account_id = run_database.account_id region = run_database.region if need_change_account_id(run_database.database_type): @@ -263,7 +330,7 @@ def __start_run(job_id: int, run_id: int): glue_database_name = run_database.database_name job_name_structured = f"{const.SOLUTION_NAME}-{run_database.database_type}-{run_database.database_name}" job_name_unstructured = f"{const.SOLUTION_NAME}-{DatabaseType.S3_UNSTRUCTURED.value}-{run_database.database_name}" - run_name = f'{const.SOLUTION_NAME}-{run_id}-{run_database.id}-{run_database.uuid}' + run_name = f'{const.SOLUTION_NAME}-{run_database.run_id}-{run_database.id}-{run_database.uuid}' # agent_bucket_name = f"{const.AGENT_BUCKET_NAME_PREFIX}-{run_database.account_id}-{run_database.region}" unstructured_parser_job_image_uri = f"{public_account_id}.dkr.ecr.{run_database.region}.amazonaws.com{url_suffix}/aws-sensitive-data-protection-models:v1.1.0" unstructured_parser_job_role = f"arn:{partition}:iam::{run_database.account_id}:role/{const.SOLUTION_NAME}UnstructuredParserRole-{run_database.region}" @@ -274,7 +341,7 @@ def __start_run(job_id: int, run_id: int): "NeedRunCrawler": need_run_crawler, "CrawlerName": crawler_name, "JobId": str(job.id), # When calling Glue Job using StepFunction, the parameter must be of string type - "RunId": str(run_id), + "RunId": str(run_database.run_id), "RunDatabaseId": str(run_database.id), "AccountId": run_database.account_id, # The original account id is required here "Region": run_database.region, # The original region is required here @@ -302,6 +369,7 @@ def __start_run(job_id: int, run_id: int): "ExtraPyFiles": extra_py_files, "FirstWait": str(account_first_wait[account_id]), "LoopWait": str(account_loop_wait[account_id]), + "JobNumber": __get_job_number(run_database.database_type), "QueueUrl": f'https://sqs.{region}.amazonaws.com{url_suffix}/{admin_account_id}/{const.SOLUTION_NAME}-DiscoveryJob', "UnstructuredParserJobImageUri": unstructured_parser_job_image_uri, "UnstructuredParserJobRole": unstructured_parser_job_role, @@ -313,17 +381,14 @@ def __start_run(job_id: int, run_id: int): __exec_run(execution_input) run_database.state = RunDatabaseState.RUNNING.value except Exception: - failed_run_count += 1 + failed_run_database_count += 1 msg = traceback.format_exc() run_database.state = RunDatabaseState.FAILED.value run_database.end_time = mytime.get_time() run_database.error_log = msg logger.exception("Run StepFunction exception:%s" % msg) crud.save_run_databases(run_databases) - if failed_run_count == len(run_databases): - crud.complete_run(run_id) - raise BizException(MessageEnum.DISCOVERY_JOB_ALL_RUN_FAILED.get_code(), - MessageEnum.DISCOVERY_JOB_ALL_RUN_FAILED.get_msg()) + return failed_run_database_count def __start_sample_run(job_id: int, run_id: int, table_name: str): @@ -431,10 +496,11 @@ def __check_sfn_version(client_sfn, arn, account_id): logger.info(f"{account_id} version is:{agent_version}") # Only check if the solution version is consistent. # Do not determine if the build version is consistent - agent_solution_version = agent_version.split('-')[0] - if not version.startswith(agent_solution_version): - raise BizException(MessageEnum.DISCOVERY_JOB_AGENT_MISMATCHING_VERSION.get_code(), - MessageEnum.DISCOVERY_JOB_AGENT_MISMATCHING_VERSION.get_msg()) + if os.getenv(const.MODE) != const.MODE_DEV: + agent_solution_version = agent_version.split('-')[0] + if not version.startswith(agent_solution_version): + raise BizException(MessageEnum.DISCOVERY_JOB_AGENT_MISMATCHING_VERSION.get_code(), + MessageEnum.DISCOVERY_JOB_AGENT_MISMATCHING_VERSION.get_msg()) def __exec_run(execution_input): @@ -765,7 +831,13 @@ def complete_run_database(input_event): logger.info(f'complete_run_database,JobId:{input_event["JobId"]},RunId:{input_event["RunId"]},DatabaseName:{input_event["DatabaseName"]}') -def check_running_run(): +def check_pending_run_databases(): + run_databases = crud.get_pending_run_databases() + if run_databases: + __start_run_databases(run_databases) + + +def check_running_run_databases(): run_databases = crud.get_running_run_databases() for run_database in run_databases: run_database_state, stop_time = __get_run_database_state_from_agent(run_database) diff --git a/source/constructs/api/lambda/auto_sync_data.py b/source/constructs/api/lambda/auto_sync_data.py index ae8cf581..28acf856 100644 --- a/source/constructs/api/lambda/auto_sync_data.py +++ b/source/constructs/api/lambda/auto_sync_data.py @@ -1,15 +1,12 @@ -import json import logging import time import boto3 from common.enum import AutoSyncDataAction, Provider from data_source.service import delete_account -from db.database import close_session, gen_session from common.reference_parameter import logger, admin_region, partition from botocore.exceptions import ClientError from common.constant import const -logger.setLevel(logging.INFO) client_sts = boto3.client('sts') @@ -37,14 +34,3 @@ def sync_data(input_event): else: break delete_account(Provider.AWS_CLOUD.value, agent_account_id, None) - - -def lambda_handler(event, context): - try: - gen_session() - for record in event['Records']: - payload = record["body"] - logger.info(payload) - sync_data(json.loads(payload)) - finally: - close_session() diff --git a/source/constructs/api/lambda/check_run.py b/source/constructs/api/lambda/check_run.py deleted file mode 100644 index 10a09ef5..00000000 --- a/source/constructs/api/lambda/check_run.py +++ /dev/null @@ -1,14 +0,0 @@ -import discovery_job.service as service -from db.database import gen_session, close_session -import logging -from common.reference_parameter import logger - -logger.setLevel(logging.INFO) - - -def lambda_handler(event, context): - try: - gen_session() - service.check_running_run() - finally: - close_session() diff --git a/source/constructs/api/lambda/controller.py b/source/constructs/api/lambda/controller.py index 015474d0..590a068c 100644 --- a/source/constructs/api/lambda/controller.py +++ b/source/constructs/api/lambda/controller.py @@ -1,16 +1,59 @@ -import discovery_job.service as service +import json +import discovery_job.service as discovery_job_service +import data_source.service as data_source_service from db.database import gen_session, close_session import logging from common.reference_parameter import logger +from common.constant import const +from . import auto_sync_data, sync_crawler_results logger.setLevel(logging.INFO) def lambda_handler(event, context): try: + logger.info(event) gen_session() - job_id = event["JobId"] - logger.info(f'JobId:{job_id}') - service.start_job(job_id) + if not event: + return + if "Records" in event: + __deal_queue(event) + return + # In the old version, the only parameter for scheduled job was JobId + if "JobId" in event and len(event) == 1: + __schedule_job(event) + controller_action = event[const.CONTROLLER_ACTION] + if not controller_action: + return + if controller_action == const.CONTROLLER_ACTION_SCHEDULE_JOB: + __schedule_job(event) + elif controller_action == const.CONTROLLER_ACTION_CHECK_RUNNING_RUN_DATABASES: + discovery_job_service.check_running_run_databases() + elif controller_action == const.CONTROLLER_ACTION_CHECK_PENDING_RUN_DATABASES: + discovery_job_service.check_pending_run_databases() + elif controller_action == const.CONTROLLER_ACTION_REFRESH_ACCOUNT: + data_source_service.refresh_account() + else: + logger.error("Unknown action") finally: close_session() + + +def __schedule_job(event): + discovery_job_service.start_job(event["JobId"]) + + +def __deal_queue(event): + event_source = event['Records'][0]["eventSourceARN"].split(":")[-1] + for record in event['Records']: + payload = record["body"] + logger.info(payload) + payload = payload.replace("\'", "\"") + current_event = json.loads(payload) + if event_source == f"{const.SOLUTION_NAME}-DiscoveryJob": + discovery_job_service.complete_run_database(current_event) + discovery_job_service.change_run_state(int(current_event["RunId"])) + elif event_source == f"{const.SOLUTION_NAME}-AutoSyncData": + auto_sync_data.sync_data(current_event) + elif event_source == f"{const.SOLUTION_NAME}-Crawler": + sync_crawler_results.sync_result(current_event) diff --git a/source/constructs/api/lambda/receive_job_info.py b/source/constructs/api/lambda/receive_job_info.py deleted file mode 100644 index d015537f..00000000 --- a/source/constructs/api/lambda/receive_job_info.py +++ /dev/null @@ -1,23 +0,0 @@ -import discovery_job.service as service -from db.database import gen_session, close_session -import json -import logging -from common.reference_parameter import logger - -logger.setLevel(logging.INFO) - - -def main(input_event): - service.complete_run_database(input_event) - service.change_run_state(int(input_event["RunId"])) - - -def lambda_handler(event, context): - try: - gen_session() - for record in event['Records']: - payload = record["body"] - logger.info(payload) - main(json.loads(payload)) - finally: - close_session() diff --git a/source/constructs/api/lambda/refresh_account.py b/source/constructs/api/lambda/refresh_account.py deleted file mode 100644 index 9e241a1b..00000000 --- a/source/constructs/api/lambda/refresh_account.py +++ /dev/null @@ -1,15 +0,0 @@ -import data_source.service as service -from db.database import gen_session, close_session -import logging -from common.reference_parameter import logger - -logger.setLevel(logging.INFO) - - -def lambda_handler(event, context): - try: - logger.info(event) - gen_session() - service.refresh_account() - finally: - close_session() diff --git a/source/constructs/api/lambda/sync_crawler_results.py b/source/constructs/api/lambda/sync_crawler_results.py index 1fb6da99..0002ec96 100644 --- a/source/constructs/api/lambda/sync_crawler_results.py +++ b/source/constructs/api/lambda/sync_crawler_results.py @@ -1,18 +1,12 @@ -import json -import logging -import re import traceback - import catalog.service as catalog_service import data_source.crud as data_source_crud from common.abilities import convert_database_type_2_provider from common.abilities import need_change_account_id from common.constant import const from common.enum import DatabaseType, ConnectionState -from db.database import gen_session, close_session from common.reference_parameter import logger -logger.setLevel(logging.INFO) crawler_prefixes = const.SOLUTION_NAME + "-" @@ -114,18 +108,3 @@ def sync_result(input_event): state=state ) logger.debug("update jdbc datasource finished") - - -def lambda_handler(event, context): - try: - gen_session() - for record in event['Records']: - payload = record["body"] - logger.info(payload) - updated_string = re.sub(r'("[^"]*?)(\'.*?\')([^"]*?")', r'\1--\3', str(payload)) - payload = updated_string.replace("\'", "\"") - sync_result(json.loads(payload)) - except Exception: - logger.error(traceback.format_exc()) - finally: - close_session() diff --git a/source/constructs/api/main.py b/source/constructs/api/main.py index ad4aa0e2..493b842b 100644 --- a/source/constructs/api/main.py +++ b/source/constructs/api/main.py @@ -16,8 +16,9 @@ from common.constant import const from template.main import router as template_router from version.main import router as version_router -from common.exception_handler import biz_exception from label.main import router as label_router +from config.main import router as config_router +from common.exception_handler import biz_exception from fastapi_pagination import add_pagination logging.config.fileConfig('logging.conf', disable_existing_loggers=False) @@ -169,6 +170,7 @@ def __online_validate(token, jwt_claims): return False +app.include_router(config_router) app.include_router(discovery_router) app.include_router(data_source_router) app.include_router(catalog_router) diff --git a/source/constructs/api/tools/list_tool.py b/source/constructs/api/tools/list_tool.py new file mode 100644 index 00000000..da877b50 --- /dev/null +++ b/source/constructs/api/tools/list_tool.py @@ -0,0 +1,4 @@ +def compare(list1: list, list2: list): + sorted_list1 = sorted(list1) + sorted_list2 = sorted(list2) + return sorted_list1 == sorted_list2 diff --git a/source/constructs/lib/admin-stack.ts b/source/constructs/lib/admin-stack.ts index 90d9608c..248494e5 100755 --- a/source/constructs/lib/admin-stack.ts +++ b/source/constructs/lib/admin-stack.ts @@ -209,6 +209,7 @@ export class AdminStack extends Stack { vpc: vpcStack.vpc, bucketName: bucketStack.bucket.bucketName, rdsClientSecurityGroup: rdsStack.clientSecurityGroup, + customDBSecurityGroup: vpcStack.customDBSecurityGroup, oidcIssuer: oidcIssuerValue, oidcClientId: oidcClientIdValue, }); diff --git a/source/constructs/lib/admin/alb-stack.ts b/source/constructs/lib/admin/alb-stack.ts index a21c5ea1..7d93f3ef 100644 --- a/source/constructs/lib/admin/alb-stack.ts +++ b/source/constructs/lib/admin/alb-stack.ts @@ -225,6 +225,7 @@ export class AlbStack extends NestedStack { }); albSecurityGroup.addIngressRule(Peer.anyIpv4(), Port.tcp(port), 'rule of allow inbound traffic from server port'); albSecurityGroup.addIngressRule(Peer.anyIpv6(), Port.tcp(port), 'rule of allow inbound traffic from server port'); + Tags.of(albSecurityGroup).add(SolutionInfo.TAG_NAME, `${SolutionInfo.SOLUTION_NAME}-ALB`); return albSecurityGroup; } diff --git a/source/constructs/lib/admin/api-stack.ts b/source/constructs/lib/admin/api-stack.ts index 1d8766e7..dfa4d8a4 100644 --- a/source/constructs/lib/admin/api-stack.ts +++ b/source/constructs/lib/admin/api-stack.ts @@ -36,7 +36,6 @@ import { Code, AssetCode, LayerVersion, - FunctionOptions, } from 'aws-cdk-lib/aws-lambda'; import { SqsEventSource } from 'aws-cdk-lib/aws-lambda-event-sources'; import { Construct } from 'constructs'; @@ -48,6 +47,7 @@ export interface ApiProps { readonly vpc: IVpc; readonly bucketName: string; readonly rdsClientSecurityGroup: SecurityGroup; + readonly customDBSecurityGroup: SecurityGroup; readonly oidcIssuer: string; readonly oidcClientId: string; } @@ -66,35 +66,38 @@ export class ApiStack extends Construct { this.apiLayer = this.createLayer(); this.code = Code.fromAsset(path.join(__dirname, '../../api'), { exclude: ['venv', 'pytest'] }); - this.createFunction('Controller', 'lambda.controller.lambda_handler', props, 20, `${SolutionInfo.SOLUTION_NAME}-Controller`); - this.apiFunction = this.createFunction('API', 'main.handler', props, 900); - const checkRunFunction = this.createFunction('CheckRun', 'lambda.check_run.lambda_handler', props, 600); - const checkRunRule = new events.Rule(this, 'CheckRunRule', { + const controllerFunction = this.createFunction('Controller', 'lambda.controller.lambda_handler', props, 900, `${SolutionInfo.SOLUTION_NAME}-Controller`); + + const checkRunningRule = new events.Rule(this, 'CheckRunningRule', { // ruleName: `${SolutionInfo.SOLUTION_NAME}-CheckRun`, schedule: events.Schedule.cron({ minute: '0/30' }), }); - checkRunRule.addTarget(new targets.LambdaFunction(checkRunFunction)); - Tags.of(checkRunRule).add(SolutionInfo.TAG_KEY, SolutionInfo.TAG_VALUE); + checkRunningRule.addTarget(new targets.LambdaFunction(controllerFunction, { + event: events.RuleTargetInput.fromObject({ Action: 'CheckRunningRunDatabases' }), + })); + // Tags.of(checkRunningRule).add(SolutionInfo.TAG_KEY, SolutionInfo.TAG_VALUE); + const checkPendingRule = new events.Rule(this, 'CheckPendingRule', { + // ruleName: `${SolutionInfo.SOLUTION_NAME}-CheckPending`, + schedule: events.Schedule.rate(Duration.minutes(1)), + }); + checkPendingRule.addTarget(new targets.LambdaFunction(controllerFunction, { + event: events.RuleTargetInput.fromObject({ Action: 'CheckPendingRunDatabases' }), + })); + // Tags.of(checkPendingRule).add(SolutionInfo.TAG_KEY, SolutionInfo.TAG_VALUE); - const receiveJobInfoFunction = this.createFunction('ReceiveJobInfo', 'lambda.receive_job_info.lambda_handler', props, 900); const discoveryJobSqsStack = new SqsStack(this, 'DiscoveryJobQueue', { name: 'DiscoveryJob', visibilityTimeout: 900 }); const discoveryJobEventSource = new SqsEventSource(discoveryJobSqsStack.queue); - receiveJobInfoFunction.addEventSource(discoveryJobEventSource); + controllerFunction.addEventSource(discoveryJobEventSource); - const updateCatalogFunction = this.createFunction('UpdateCatalog', 'lambda.sync_crawler_results.lambda_handler', props, 900); const crawlerSqsStack = new SqsStack(this, 'CrawlerQueue', { name: 'Crawler', visibilityTimeout: 900 }); const crawlerEventSource = new SqsEventSource(crawlerSqsStack.queue); - updateCatalogFunction.addEventSource(crawlerEventSource); + controllerFunction.addEventSource(crawlerEventSource); - const autoSyncDataFunction = this.createFunction('AutoSyncData', 'lambda.auto_sync_data.lambda_handler', props, 900); - // Set delivery delay to 10 minutes to wait for agent stack to be deleted const autoSyncDataSqsStack = new SqsStack(this, 'AutoSyncDataQueue', { name: 'AutoSyncData', visibilityTimeout: 900 }); const autoSyncDataEventSource = new SqsEventSource(autoSyncDataSqsStack.queue); - autoSyncDataFunction.addEventSource(autoSyncDataEventSource); - - this.createFunction('RefreshAccount', 'lambda.refresh_account.lambda_handler', props, 60, `${SolutionInfo.SOLUTION_NAME}-RefreshAccount`); + controllerFunction.addEventSource(autoSyncDataEventSource); } private createFunction(name: string, handler: string, props: ApiProps, timeout?: number, functionName?: string) { @@ -111,6 +114,7 @@ export class ApiStack extends Construct { vpcSubnets: props.vpc.selectSubnets({ subnetType: SubnetType.PRIVATE_WITH_EGRESS, }), + // securityGroups: [props.rdsClientSecurityGroup, props.customDBSecurityGroup], securityGroups: [props.rdsClientSecurityGroup], environment: { AdminBucketName: props.bucketName, diff --git a/source/constructs/lib/admin/delete-resources/delete_resources.py b/source/constructs/lib/admin/delete-resources/delete_resources.py index 687e9f23..52bf28db 100644 --- a/source/constructs/lib/admin/delete-resources/delete_resources.py +++ b/source/constructs/lib/admin/delete-resources/delete_resources.py @@ -72,9 +72,9 @@ def on_delete(event): def refresh_account(event): - response = lambda_client.invoke(FunctionName=f'{solution_name}-RefreshAccount', - Payload='{"UpdateEvent":"RerefshAccount"}') - print(response) + response = lambda_client.invoke(FunctionName=f'{solution_name}-Controller', + Payload='{"Action":"RefreshAccount"}') + logger.info(response) def __do_delete_rule(rule_name): diff --git a/source/constructs/lib/admin/rds-stack.ts b/source/constructs/lib/admin/rds-stack.ts index fe99ae7a..101232b9 100644 --- a/source/constructs/lib/admin/rds-stack.ts +++ b/source/constructs/lib/admin/rds-stack.ts @@ -15,6 +15,7 @@ import * as path from 'path'; import { CustomResource, Duration, RemovalPolicy, + Tags, } from 'aws-cdk-lib'; import { InstanceClass, @@ -66,6 +67,7 @@ export class RdsStack extends Construct { vpc: props.vpc, description: 'connet to RDS', }); + Tags.of(this.clientSecurityGroup).add(SolutionInfo.TAG_NAME, `${SolutionInfo.SOLUTION_NAME}-RDS Client`); const rdsSecurityGroup = new SecurityGroup(this, 'RDSSecurityGroup', { // securityGroupName: 'RDS', vpc: props.vpc, @@ -76,6 +78,7 @@ export class RdsStack extends Construct { Port.tcp(this.dbPort), 'Allow RDS client', ); + Tags.of(rdsSecurityGroup).add(SolutionInfo.TAG_NAME, `${SolutionInfo.SOLUTION_NAME}-RDS`); const secretName = `${SolutionInfo.SOLUTION_NAME}`; const dbSecret = new DatabaseSecret(this, 'Secret', { @@ -113,7 +116,7 @@ export class RdsStack extends Construct { removalPolicy: RemovalPolicy.DESTROY, multiAz: true, deletionProtection: false, - caCertificate: CaCertificate.RDS_CA_RDS4096_G1, + caCertificate: CaCertificate.RDS_CA_RDS2048_G1, }); new SecretRotation(this, 'SecretRotation', { diff --git a/source/constructs/lib/admin/vpc-stack.ts b/source/constructs/lib/admin/vpc-stack.ts index c6d44920..4a1e7e4e 100644 --- a/source/constructs/lib/admin/vpc-stack.ts +++ b/source/constructs/lib/admin/vpc-stack.ts @@ -68,6 +68,7 @@ export class VpcStack extends Construct { public publicSubnet2 = ''; public privateSubnet1 = ''; public privateSubnet2 = ''; + readonly customDBSecurityGroup: SecurityGroup; constructor(scope: Construct, id: string, props?: VpcProps) { super(scope, id); @@ -78,14 +79,15 @@ export class VpcStack extends Construct { this.createVpc(props); } // Create CustomDB Security Group - const securityGroup = new SecurityGroup(this, 'CustomDBSecurityGroup', { + this.customDBSecurityGroup = new SecurityGroup(this, 'CustomDBSecurityGroup', { vpc: this.vpc, securityGroupName: 'SDPS-CustomDB', description: 'Allow all TCP ingress traffic', }); // Allow ingress on all TCP ports from the same security group - securityGroup.addIngressRule(securityGroup, Port.allTcp()); + this.customDBSecurityGroup.addIngressRule(this.customDBSecurityGroup, Port.allTcp()); + Tags.of(this.customDBSecurityGroup).add(SolutionInfo.TAG_NAME, `${SolutionInfo.SOLUTION_NAME}-ConnectToCustomDatabase`); } private createVpc(props?: VpcProps) { diff --git a/source/constructs/lib/agent/split-job/split_job.py b/source/constructs/lib/agent/split-job/split_job.py index 8ea9096e..37c902eb 100644 --- a/source/constructs/lib/agent/split-job/split_job.py +++ b/source/constructs/lib/agent/split-job/split_job.py @@ -34,7 +34,7 @@ def divide_and_round_up(a, b): def get_job_number(event): if "JobNumber" in event: return event["JobNumber"] - if event['DatabaseType'] == "s3" or event['DatabaseType'] == "glue": + if event['DatabaseType'] in ["s3","glue"]: return 10 return 3 From dafc45093ab0ec5ef097d391e1daf116ae579a93 Mon Sep 17 00:00:00 2001 From: junzhong Date: Tue, 23 Jan 2024 11:07:36 +0800 Subject: [PATCH 005/112] feat(cdk): add batch create jdbc datasource template --- .../template/batch-create-jdbc-datasource.xlsx | Bin 0 -> 10033 bytes source/constructs/lib/admin/glue-stack.ts | 8 ++++++++ 2 files changed, 8 insertions(+) create mode 100644 source/constructs/config/batch-create-jdbc-datasource/template/batch-create-jdbc-datasource.xlsx diff --git a/source/constructs/config/batch-create-jdbc-datasource/template/batch-create-jdbc-datasource.xlsx b/source/constructs/config/batch-create-jdbc-datasource/template/batch-create-jdbc-datasource.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..af339acdae7e058880b21890d74b71b6498ca940 GIT binary patch literal 10033 zcmeHN1yh{K)*gIF1cN1QawT02TlQ006`Qz2j69EeHT05*h%&001G?g{-X{46Pir z6+hb;+H1abv9ut{f`+8Z06@Mx|G)8H`~`|)2BbR}QAK`!d=T2C7hnAG9>{j&+k;Lc z$KTo+-Cdxso@Q$LoECJCDwz4&jH4WVWYLZJY{=w?m1VVWV0VK&Vu(*~tCAKz2SZoS zA$bcnF4#^@^&|rm|2-3~wsx3NG9baWrbC`pcuhdO2+!m-7QgpK>U@tf+DdmFVu4yd zSMNgGsOEDuV-FwUoB^=%#l8UyH)NO+$OZh%`@gT+Ft!ds~~D$D>0czz|m^&9*^7M=$5+Q z@mbuS7xNP=0Py?_1(5w4EUT0lNiSb)P4WeG2rsbIwllP_XL$SD{69GU7h~`*TQ34j z$#gIx`k#D!2}1;o*&c)uO`oj{)Hvp zu>D@*>vh(m5Oho)(rV{o|M)vwM>rY^yEswXqP0#mr>U!{>%>@^dYyNC2H zkW2D#Jn?m}y`J%0iF>aF@zpJ!l7b1BX_;P{9XH8mZ3Bze3*qDzj7N8R>6Bi15?1sZ z=8+FQK1F4@oIbL-X_MZ0nhJf;yUl{5?LE^(76MymYMe1Kbf(HOXxL7d! zffHwIJ99m2YxCdO>))7xc%iTtE&tu4NM1^!lM%HE;VFR8Ioato+MELe$)VB#3b3z| zdVzwN#q)9npQc_@Wm!;Fs1X7;0m{Vz5% zP`T-S_eJ5b*tfE#hA4^JXzRI7XyF<*-S;zS>;m_d7%$Riz}W62G3`y8$Tjf1@xe&y z^RRi_w>!iwf$-r%M+u`zMiR- zmy`ZalTqrn%C&<60ODV~AC#9TUQFh%#!{rDY5k28(?i?z8EQRsew)@iDI5%#7eBIy z?W=5%iG`9bgHK?~6;m4WS}{ilJ2B%K@`Rw{A0BjY{d|)=AhS@wnwDr$m)}EC*P7e5 zFS*&M6){vAG6^sFO{sF&l-i7)U0cQ6v;OX}W*{t5MuIpE5s<%7u1Yl}46IUZqnXzV z^q*p^K>v{i|7HuXD$Ib@8lhWJi3DFCq!cr=jZ6&Q(FvRiHm1W;{Zy`Y^q!3NJLH{Z zTe8#p!XNglG&Je;-TlH-fKG6{M$=_RuAJf6RXrB?twdxKd0Khw>BX81BkT1kMFRI{ zr7jj~wQ_lEK$ugu%q2!B@N>NHkty=9kX@9)h{numtN0zrU<*|CK#J4W5-QrN*7S+w z<1*Y>BNWF%W4*>1cTnJ0~wCKJPn0;!K~yYynF4;Rbkpex$##(0KWaTOb@wV z7|au7@#r{R*1&amLHn)&7yH-@R_C{(4Uw8nwqVu!Y+=7;oanS;-)9}EB-17!^vTDg z;}&d`MFZB^6KOH1QkRJNJ*S#Vp6X(}|(I8?@4ms!5@%fo)C}VSzjq z?FPZcWCUEc-W#*d(bhzwcXAXadBGmVY?reRnPrp;H4Z$3s05C@@5Wua?gz#norf5I zN+C?=&GQ*2U+=;{npl8bQbVLlcMWTQGVU&1VxaJT>YA35AkcU+)73lzheL zsaPbcEN*qQFyHQWS`PzrCc4&(-r=J@f+%%QE=_J~P2yl;&4^8}q0_Uu)TK-ja_3(` zHe2^eH|YcuX6)vYRcWB>^_HBdX{B&42Xl{XutR98%C5q89_|(Xx3az!*z?MdjlG0@uR%B-8>OF zpDBZWWE99}$gpx-$jm(a^!##X|0g`tM?XnNy}+{p0RTYy1D^H{pDhgSf4j*WmDMA@ zv159cPCo10e)k~KfD{Y#o+87|E_NyuXNkQtYh_dp3kzi-SHEicnIIv^%~Loxw?{%= zE^~O|zSAGH?8x>ZLa|CR7^eXNj+Dkul~Jzi=I8z7yw0Q<34D7_1{41nR*Qt^1F_jv z&_n~AI2$Bztougbz{q^T^69<#_*(-ZmsQk!<6AmAwb}+ps1zildqQxJ3HychwRD zT8SorhI$VCgD6;3lYTmLF8hqQzPvH&y_gYit=St;^`V0P0lEN|CZb?4%|1Q zpm~|yC>vmDz!6zLc|5#nAUbCs8@hlQcgifrFmqRE85!F~p1|VD0TfSKA#Uwl%8#&q z0b6uKq8qJx{=5%Ls-X%l1TM?tWu@%!kCo9~dpneGlDylyx@{VH4{GSgyH zFTc6z+BU#Y39e}0G^|QD)&&(=#4yQ?hGQ{l-SVA=7WUP|pSp@lc}E%LvZiz~BX&5V zT0nAKy5C$no<6exS0V*Y;DNRjN)SlyG;Y|7Jb~cA33(wY8fCr&ZF>Bh;q2+rO>n+W zHrUrjp?f0C7NU%6>tUGn_FX^iVTtg%N?w&G3bF6ZS5v?Qx*p40xJF(5(;XA2YeXlc zn3gwKj$63TbqjA#@x|zwlA^g+M6*qAs9|U&IS8C>j1R+BTFqPQ*NV{D2v?(lc!y;o zUex1@6D6O_H6~+6+;oiso~xYzi<`v2Z3JFu#q^6;wJz=FT5ab@k1+c!EYbWO#CgVF z?UmSfAm49W;%{gVX>{tw;Y8yYe(28R@yIm*RP4SLndOkMG)iga9Z@54hoSF_I=H_` zXKF5bH||Slemdx~&v8xVh=R5;aOb!8+Uj}PXYpaQeSpw&Os*W~*`q-{AYfs#f_>OZ z|5gG|MYoJc#iwP35ZQFsW%wz5F8d0}Uilr4U_sqe^%i5;_p#Zihb0lLjX9%?l@<=~ z>lC-$;*{i8$0CQl+PD@HeDex5SY^E0W!2(Ib_Gps&9V>JGYfSqY>^oU660E+!Ri9~ zN7c3(ar&%Kg!X)FCr!udAt3ZyPQaqgkuSy}1>b1->dCr!FcXs=BE^b(xqWYxS!0S4 zL<*%^`BYF&o>{)p@k221>#qDUA@TgVZWK!u@ zM_RHqm*%pZ)iYtl-EM6pp7Nw4Wazv6r=5 zo=*4AIQAwf&)Pd%9&QKXTb|FjugcaJUo$j&oS*MSW3PBz?Z`wB&My!UCY%xnm7kWZ z>7P)9==vbvpFzA!?PL_}1iweyb;>^;=!Mi$C)#g!EIVpnAW-RsY2MqJ7|Swa zYg=CzEUF;1mF!zQ;U#fR&aD(h*VJ%isZ}<}EzoAj1oaHeE+e?1MeHC9sA?SVigoO+ zX+!I2UD2bXz07yi42|nkouS#9s0ZeEp;$oLZ=6aU7F8o6L>^ zg(<4sgb}?N)-@9JgYv<-|BW+*JuFOw`q_O9cQTdXj<8R~tDhh`b61nJ#a0_|k^S^% zC3~%@o9FuiqX{kkQo~{M#0$0VtLrNPv&2SspMHvy^W8L=#_}x|;+W$Bn63svXAkJc zu~0b1`~|ss$~rTMVUn99wgkGx7*hHTTZ(s-%<~bWh-yNHORXuvl2?i@EEE~~*#VBU zuhU11@1-wXBElx5jq8f)lfEOsfNx!>HNu4A#dCMn_)eCKCl5S#6`fV%VX5XOaC9Y& z@Z}tF<$EH8tR4dkQG|oST)pjQGe^^cCXsA<(olzrFvf94U+gtHo}svV z)$aPlfJtVVI=wo4JC^-06f|=Y63SX13JQhMP9Cen52c4$xG;J*?@rac&pQ{-i_nUI5P~QsKGKiyAt!ab;#&bpT!&3 zCOtx++8F862brZ*8>KsfD=Op`Dp}}&h%%GxHH z75(Hh(W(o-I^ie}JeFzfJJO)6>O{$DAWt0JL1^fXia42;_RPFJ-q@la`id43BB?!y zUA1<%-mEd_ZhL}8lzLy)pP_2$WcJmlijNAqN&4I9;WYPGtzz{PB-f>~F#_JxCQ>Bi z`6(;e0}l8qyBc+%8M&e7H`RQ&SaF{G8vMtMMAN`kzo9y;Et1^CK=2CRPGds5!G) znmZriU=-;LTI7kb9kraCMrF%qBPl_KGkK<8^c_BB(_E5-#?}wp?84y$B^_u?Hz2-y zoON-=QSI4JQL4wn^?Z%BGV;zdfv{?BK^_?3ExUjiKg^~eNXPJ1Z+a#ye@!AYOb=OW zD15dX80So)goX$GN=|aRmQr#E$Nw#SUzguD9J$$yP-uRF-;E_TzFRIeKWtvY90)JM zdY&SNwpNJZ4ZJl;LbU1&MWCgYgjZ@+y{;W1dVmq$h( zs{OI%HlqCt6Vtj#HBw{N?!6#9B{o)5dzN!M7@>$hKJkq~+S5fpBbX{Z3s-!*DD$Kr4eVhEe{kZinB;G+nbi%w||ou&l&carNwnHZrrZQRAflLE^KnMkRq zhqkA9w{pUMR1Xp8X3NxVTCvXhF=rXjJXECnl%&326}jVe;E0h#Urh)Auj#U%S`eTr z7-K3euVobiZ>r<654yB}cieuTEWql>zAwrAn(;wcae>n)!7N^ACjlVZJNGXnRx4Pj ziQm08HwPnmn&hjbCMcD(MSi$7s$%;Ba?_?X_e%Y2ysJEbB{fjWEsD>zTSHfRk=`Gt zyWb`Gpt|cM(MScDFx;YwX^nK7GDEJi1X6#b`bgbN#ii@ zEB+&BM+%T*G^1YS-Nr#ec<*k*nFVn*}Ctb_IMxtVvAv-Q82kcw?l zGP?-toMBED?C3E5$X6)^TApL%i8_bzlan-xP0rAcczLO42}xvcVgFMUJb*0?@_vDG zD+~aD`3IEkO?2%H4HO*gOs$N64~8uiCG01ZF_&37yhcq#uyq1NC^u{eV+^>VCSZh; z96$8Ns}QO)*NemRoR`t_c*f$Gm=G}~tiqP@@}jFdfk6tXw5DC!s~Q7Ngn9T8C!alc zcvj>tyE_!+Rmi!OSRJKP>aeh2vij9D= zm1Lyfdyz@B$@K<#7Hqi#ILL*hPlcep*e?TD7*-Pt3u4K6^BJ-i|A^RfMNX_wlIM533bTU+qho zxYt2aKD*q?Jc(iq9-^r9a}`#wA*5kG)+0F?Ch*{Gsz1Y|7-l@ZIcl8k4oasL_{NIZ z;UOhDsDo0YD>Nmz3Z>0Nkif`MhKzv?jCCX*+UbIP+HfEB?RZ1aOA^Xc@yLHki*w`l zkVYVOkd)b{hHjWJnvW)CUhiZ0)%M(l4^3ACH&M6L=|fdHzVR6Y?NuQEN5SvwRcaq| z?(250uQO7w>;hy>T(|V|c@b)1-SE{)QzqyQ=Dcq4DmH}B5GnEYY{4kuCfPj~PI(G8 zclT`tnQGfj@3z-iSu`B=CyI?Sm4tcJtp&U6T&K%sNSb$H8`o6S07A|%gH>xUnHl@SlU%6> z=k*vi!A2RRXY?B0&B_wxc~H-m7k5hbm{z#QCLe3=g0wRrUnM8Ff3?E`biQuE0n+I* z8|6E9X3w-|v(5(8`G zQ+IO0+Pd$nnOr{&&?$R@$k>zV?KFJYfnUSsye9l7#40fz6Ue>LvfE2m3+<&6VPLH< zYiDg^&!BH@XZWug`u_?LFIQP8SW}{d5$Ed(%(GCRN3!P)lhA@se+dD+6V>w9%_A&} z=t$v)N4E*9!dPmT#FPtut{w7Q_XYMgN*vl4I$88CDnK75ww-2%QWemcsw`Yw8lOD# zJx`g5R$BU&Bt|YGFK$j7Cj|Q`ndz$un0d~;SKV=z89D6d$a`ICiE-RstmZ}3Zh7Xd zrUcq+SOi;>a*I$xeCT5keRF}6pj=!)impip%pRR|7POZeVZ!tiHzO5>G3}!Cy>vjW z0=u!Bt%G$jPMGG5h$egI=`F@Xy}Qes0)N5l20ico6qa^G**wWQ%t5L`Gi=`JDjn1! zxlf=70Iv!!u{W~^3gXD_5wyhjP4@<;ntC{XNS2w`me;c>HC26Na%2%~g6S@F6# zcrnuAdV1%EY30L;c4YwTGf}SU>pZg5sDZ*)XFw`@_jAquf6l>NzhaJyf3ZW-7xqB? zs~zgv*!<57U+nK+BLysFz3^M@6zUp;xP;HOB#Y=%l=YG4vMdV%>|`vJtDpIUEU+l* za3-R9Y;wWydAw$9Q?l;Q2b^33%%6>KU!^+iHD7xEqE zt`zmM(z!9Yc=2I{s;cgeqz}8@F3{vF2>Wja%bLS%ajW)Cr~Gmf5FA=Q`^0D67^1?K ziC~U?*9P4lX5X1gigGh+ zA(`ui%COT(+R@3g#%omLtP7`k?(c#I;_S)0=tN0QhoeY_}6O4pTgmYzYG6a8~IiA*JSLUqT;Ckn40}n!LQ-up9=b3 zeAa)x_odRm0zz^-p;KK=j3G{)1w1)xfO$Dx0092w=lw$E$z;F%`#+yRnlS(X literal 0 HcmV?d00001 diff --git a/source/constructs/lib/admin/glue-stack.ts b/source/constructs/lib/admin/glue-stack.ts index 953eba84..2b1502a8 100644 --- a/source/constructs/lib/admin/glue-stack.ts +++ b/source/constructs/lib/admin/glue-stack.ts @@ -56,6 +56,14 @@ export class GlueStack extends Construct { destinationKeyPrefix: 'job/script', }); + new S3Deployment.BucketDeployment(this, 'BatchCreateJdbcDatasource', { + memoryLimit: 512, + ephemeralStorageSize: Size.mebibytes(100), + sources: [S3Deployment.Source.asset('batch-create-jdbc-datasource/template')], + destinationBucket: props.bucket, + destinationKeyPrefix: 'batch-create-jdbc-datasource/template', + }); + // When upgrading, files with template as the prefix will be deleted // Therefore, the initial template file will no longer be deployed. // new S3Deployment.BucketDeployment(this, 'DeploymentTemplate', { From 1d4914a1aca04547b9f4bcdf5c1bc7201e4220f0 Mon Sep 17 00:00:00 2001 From: junzhong Date: Tue, 23 Jan 2024 11:27:47 +0800 Subject: [PATCH 006/112] fix(cdk): add batch create jdbc datasource template --- source/constructs/lib/admin/glue-stack.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/constructs/lib/admin/glue-stack.ts b/source/constructs/lib/admin/glue-stack.ts index 2b1502a8..3dfae1ac 100644 --- a/source/constructs/lib/admin/glue-stack.ts +++ b/source/constructs/lib/admin/glue-stack.ts @@ -58,7 +58,7 @@ export class GlueStack extends Construct { new S3Deployment.BucketDeployment(this, 'BatchCreateJdbcDatasource', { memoryLimit: 512, - ephemeralStorageSize: Size.mebibytes(100), + ephemeralStorageSize: Size.mebibytes(512), sources: [S3Deployment.Source.asset('batch-create-jdbc-datasource/template')], destinationBucket: props.bucket, destinationKeyPrefix: 'batch-create-jdbc-datasource/template', From d13dbfe3de885a5a0c0f7711ea17b972cdd2879a Mon Sep 17 00:00:00 2001 From: junzhong Date: Tue, 23 Jan 2024 11:35:33 +0800 Subject: [PATCH 007/112] fix(cdk): add batch create jdbc datasource template --- source/constructs/lib/admin/glue-stack.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/constructs/lib/admin/glue-stack.ts b/source/constructs/lib/admin/glue-stack.ts index 3dfae1ac..56e0babd 100644 --- a/source/constructs/lib/admin/glue-stack.ts +++ b/source/constructs/lib/admin/glue-stack.ts @@ -59,7 +59,7 @@ export class GlueStack extends Construct { new S3Deployment.BucketDeployment(this, 'BatchCreateJdbcDatasource', { memoryLimit: 512, ephemeralStorageSize: Size.mebibytes(512), - sources: [S3Deployment.Source.asset('batch-create-jdbc-datasource/template')], + sources: [S3Deployment.Source.asset('config/batch-create-jdbc-datasource/template')], destinationBucket: props.bucket, destinationKeyPrefix: 'batch-create-jdbc-datasource/template', }); From 3c3f2c36900e209631937f3b8bd8b608a67071ed Mon Sep 17 00:00:00 2001 From: junzhong Date: Tue, 23 Jan 2024 13:39:05 +0800 Subject: [PATCH 008/112] fix(cdk): invoke function policy --- source/constructs/lib/admin/delete-resources-stack.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/constructs/lib/admin/delete-resources-stack.ts b/source/constructs/lib/admin/delete-resources-stack.ts index d1bf3a46..7c9a4521 100644 --- a/source/constructs/lib/admin/delete-resources-stack.ts +++ b/source/constructs/lib/admin/delete-resources-stack.ts @@ -80,7 +80,7 @@ export class DeleteResourcesStack extends Construct { ], resources: [ `arn:${Aws.PARTITION}:events:*:${Aws.ACCOUNT_ID}:rule/*`, - `arn:${Aws.PARTITION}:lambda:${Aws.REGION}:${Aws.ACCOUNT_ID}:function:${SolutionInfo.SOLUTION_NAME}-RefreshAccount`, + `arn:${Aws.PARTITION}:lambda:${Aws.REGION}:${Aws.ACCOUNT_ID}:function:${SolutionInfo.SOLUTION_NAME}-Controller`, ], }); deleteAdminResourcesRole.addToPolicy(noramlStatement); From 872ce8dadd2db62c92ab9c1b6d12303d190165b3 Mon Sep 17 00:00:00 2001 From: cuihubin <530051970@qq.com> Date: Wed, 24 Jan 2024 15:38:15 +0800 Subject: [PATCH 009/112] batch operation --- source/constructs/api/common/constant.py | 5 +- .../api/common/exception_handler.py | 3 + source/constructs/api/data_source/crud.py | 2 +- source/constructs/api/data_source/main.py | 18 +- source/constructs/api/data_source/service.py | 298 ++++++++++++------ 5 files changed, 231 insertions(+), 95 deletions(-) diff --git a/source/constructs/api/common/constant.py b/source/constructs/api/common/constant.py index c3574119..c70a9c8c 100644 --- a/source/constructs/api/common/constant.py +++ b/source/constructs/api/common/constant.py @@ -96,8 +96,11 @@ def __setattr__(self, name, value): const.PUBLIC = 'Public' const.PRIVATE = 'Private' const.ZERO = 0 -const.BATCH_CREATE_LIMIT = 100 +const.BATCH_CREATE_LIMIT = 1000 const.BATCH_SHEET = "OriginTemplate" +const.CONNECTION_DESC_MAX_LEN = 10 +const.BATCH_CREATE_TEMPLATE_PATH = 'batch-create-jdbc-datasource/template/batch-create-jdbc-datasource.xlsx' +const.BATCH_CREATE_REPORT_PATH = 'batch-create-jdbc-datasource/report' const.UNSTRUCTURED_FILES = { "document": ["doc", "docx", "pdf", "ppt", "pptx", "xls", "xlsx", "odp"], diff --git a/source/constructs/api/common/exception_handler.py b/source/constructs/api/common/exception_handler.py index 9902d5d7..a4fdedc7 100644 --- a/source/constructs/api/common/exception_handler.py +++ b/source/constructs/api/common/exception_handler.py @@ -46,3 +46,6 @@ def __init__(self, self.code = code self.message = message self.ref = ref + + def __msg__(self): + return self.message diff --git a/source/constructs/api/data_source/crud.py b/source/constructs/api/data_source/crud.py index 4eb220e1..c85253df 100644 --- a/source/constructs/api/data_source/crud.py +++ b/source/constructs/api/data_source/crud.py @@ -988,7 +988,7 @@ def get_enable_account_list(): def update_schema_by_account(provider_id, account_id, instance, region, schema): session = get_session() jdbc_instance_source = session.query(JDBCInstanceSource).filter(JDBCInstanceSource.account_provider_id == provider_id, - JDBCInstanceSource.region == region, + JDBCInstanceSource.region == region, JDBCInstanceSource.account_id == account_id, JDBCInstanceSource.instance_id == instance).first() if not jdbc_instance_source: diff --git a/source/constructs/api/data_source/main.py b/source/constructs/api/data_source/main.py index 3049c469..6b19d21c 100644 --- a/source/constructs/api/data_source/main.py +++ b/source/constructs/api/data_source/main.py @@ -352,7 +352,6 @@ def test_jdbc_conn(jdbc_conn_param: schemas.JDBCInstanceSourceBase): def get_data_location_list(): return service.list_data_location() - @router.get("/query-regions-by-provider", response_model=BaseResponse) @inject_session def query_regions_by_provider(provider_id: str): @@ -379,7 +378,6 @@ def list_buckets(account: schemas.AccountInfo): def query_connection_detail(account: schemas.JDBCInstanceSourceBase): return service.query_connection_detail(account) - @router.post("/jdbc-databases", response_model=BaseResponse[list[str]]) @inject_session def list_jdbc_databases(source: schemas.JdbcSource): @@ -390,7 +388,17 @@ def list_jdbc_databases(source: schemas.JdbcSource): def batch_create(files: List[UploadFile] = File(...)): return service.batch_create(files[0]) -# @router.post("/snapshop", response_model=BaseResponse) +@router.post("/query-batch-status", response_model=BaseResponse) +@inject_session +def query_batch_status(batch: str): + return service.query_batch_status(batch) + +@router.post("/download-batch-file", response_model=BaseResponse) +@inject_session +def download_batch_file(filename: str): + return service.download_batch_file(filename) + +# @router.post("/batch-sync", response_model=BaseResponse) # @inject_session -# def get_schema_by_snapshot(provider_id: int, account_id: str, instance: str, region: str): -# return service.get_schema_by_snapshot(provider_id, account_id, instance, region) +# def batch_sync(connection_list: []): +# pass diff --git a/source/constructs/api/data_source/service.py b/source/constructs/api/data_source/service.py index 69397ed5..ef6918dc 100644 --- a/source/constructs/api/data_source/service.py +++ b/source/constructs/api/data_source/service.py @@ -3,6 +3,7 @@ from io import BytesIO import json import os +import random import re import time import traceback @@ -11,7 +12,6 @@ import boto3 from fastapi import File, UploadFile import openpyxl -import pandas as pd import pymysql from botocore.exceptions import ClientError @@ -72,8 +72,9 @@ r'jdbc:oracle:thin://@[\w.-]+:\d+/([\w-]+)', r'jdbc:oracle:thin://@[\w.-]+:\d+:\w+', r'jdbc:sqlserver://[\w.-]+:\d+;databaseName=([\w-]+)', - r'jdbc:sqlserver://[\w.-]+:\d+;database=([\w-]+)' - ] + r'jdbc:sqlserver://[\w.-]+:\d+;database=([\w-]+)'] + +__s3_client = boto3.client('s3') def build_s3_targets(bucket, credentials, region, is_init): s3 = boto3.client('s3', @@ -402,8 +403,23 @@ def sync_jdbc_connection(jdbc: JDBCInstanceSourceBase): logger.debug(f"conn_response type is:{type(conn_response)}") logger.debug(f"conn_response is:{conn_response}") + if conn_response.get('ConnectionProperties'): + username = conn_response.get('ConnectionProperties', {}).get('USERNAME') + password = conn_response.get('ConnectionProperties', {}).get('PASSWORD') + secret = conn_response.get('ConnectionProperties', {}).get("SECRET_ID"), + url = conn_response.get('ConnectionProperties', {}).get('JDBC_CONNECTION_URL'), + jdbc_instance = JDBCInstanceSource(jdbc, jdbc_connection_url=url, master_username=username, password=password, secret=secret) + # jdbc_instance.jdbc_connection_url = url # condition_check(ec2_client, credentials, source.glue_state, conn_response['PhysicalConnectionRequirements']) - sync(glue_client, lakeformation_client, credentials, crawler_role_arn, jdbc, conn_response['ConnectionProperties']['JDBC_CONNECTION_URL'], source.jdbc_connection_schema) + sync(glue_client, + lakeformation_client, + credentials, + crawler_role_arn, + jdbc_instance, + source.jdbc_connection_schema) + else: + raise BizException(MessageEnum.BIZ_UNKNOWN_ERR.get_code(), + MessageEnum.BIZ_UNKNOWN_ERR.get_msg()) def condition_check(ec2_client, credentials, state, connection: dict): @@ -492,14 +508,15 @@ def condition_check(ec2_client, credentials, state, connection: dict): MessageEnum.SOURCE_AVAILABILITY_ZONE_NOT_EXISTS.get_msg()) -def sync(glue, lakeformation, credentials, crawler_role_arn, jdbc: JDBCInstanceSourceBase, url: str, schemas: str): +def sync(glue, lakeformation, credentials, crawler_role_arn, jdbc: JDBCInstanceSource, schemas: str): jdbc_targets = [] _, glue_database_name, crawler_name = __gen_resources_name(jdbc) state, glue_connection_name = crud.get_jdbc_connection_glue_info(jdbc.account_provider_id, jdbc.account_id, jdbc.region, jdbc.instance_id) if state == ConnectionState.CRAWLING.value: raise BizException(MessageEnum.SOURCE_CONNECTION_CRAWLING.get_code(), MessageEnum.SOURCE_CONNECTION_CRAWLING.get_msg()) - db_names = get_db_names(url, schemas) + jdbc_source = JdbcSource(connection_url=jdbc.jdbc_connection_url, username=jdbc.master_username, password=jdbc.password, secret_id=jdbc.secret) + db_names = get_db_names_4_jdbc(jdbc_source, schemas) try: for db_name in db_names: trimmed_db_name = db_name.strip() @@ -1366,13 +1383,7 @@ def refresh_third_data_source(provider_id: int, accounts: list[str], type: str): raise BizException(MessageEnum.SOURCE_REFRESH_FAILED.get_code(), MessageEnum.SOURCE_REFRESH_FAILED.get_msg()) try: - # if type == DataSourceType.jdbc.value: jdbc_detector.detect(provider_id, accounts) - # elif type == DataSourceType.all.value: - # s3_detector.detect(accounts) - # rds_detector.detect(accounts) - # glue_database_detector.detect(accounts) - # jdbc_detector.detect(accounts) except Exception as e: logger.error(traceback.format_exc()) raise BizException(MessageEnum.SOURCE_CONNECTION_FAILED.get_code(), str(e)) @@ -1645,11 +1656,12 @@ def import_glue_database(glueDataBase: SourceGlueDatabaseBase): crud.import_glue_database(glueDataBase, response) def update_jdbc_conn(jdbc_conn: JDBCInstanceSource): - get_db_names(jdbc_conn.jdbc_connection_url, jdbc_conn.jdbc_connection_schema) + jdbc_source = JdbcSource(connection_url=jdbc_conn.jdbc_connection_url, username=jdbc_conn.master_username, password=jdbc_conn.password, secret_id=jdbc_conn.secret) + dbnames = get_db_names_4_jdbc(jdbc_source, jdbc_conn.jdbc_connection_schema) account_id, region = __get_admin_info(jdbc_conn) res: JDBCInstanceSourceFullInfo = crud.get_jdbc_instance_source_glue(jdbc_conn.account_provider_id, jdbc_conn.account_id, jdbc_conn.region, jdbc_conn.instance_id) check_connection(res, jdbc_conn, account_id, region) - update_connection(res, jdbc_conn, account_id, region) + update_connection(res, jdbc_conn, account_id, region, dbnames) def check_connection(res: JDBCInstanceSourceFullInfo, jdbc_instance: JDBCInstanceSource, assume_account, assume_role): if not res: @@ -1675,10 +1687,10 @@ def check_connection(res: JDBCInstanceSourceFullInfo, jdbc_instance: JDBCInstanc else: pass -def update_connection(res: JDBCInstanceSourceFullInfo, jdbc_instance: JDBCInstanceSourceUpdate, assume_account, assume_role): - # logger.info(f"source.glue_connection is: {source.glue_connection}") +def update_connection(res: JDBCInstanceSourceFullInfo, jdbc_instance: JDBCInstanceSourceUpdate, assume_account, assume_region, db_names): + jdbc_targets = __gen_jdbc_targets_from_db_names(res.glue_connection, db_names) connectionProperties_dict = gen_conn_properties(jdbc_instance) - response = __glue(account=assume_account, region=assume_role).update_connection( + __glue(account=assume_account, region=assume_region).update_connection( CatalogId=assume_account, Name=res.glue_connection, ConnectionInput={ @@ -1695,6 +1707,18 @@ def update_connection(res: JDBCInstanceSourceFullInfo, jdbc_instance: JDBCInstan } } ) + crawler_role_arn = __gen_role_arn(account_id=assume_account, + region=assume_region, + role_name='GlueDetectionJobRole') + # Update Crawler + __update_crawler(res.account_provider_id, + res.account_id, + res.instance_id, + res.region, + jdbc_targets, + res.glue_crawler, + res.glue_database, + crawler_role_arn) crud.update_jdbc_connection_full(jdbc_instance) def __validate_jdbc_url(url: str): @@ -1703,8 +1727,10 @@ def __validate_jdbc_url(url: str): return True def add_jdbc_conn(jdbcConn: JDBCInstanceSource): + print(f"create {jdbcConn.instance_id}!!!") jdbc_targets = [] - get_db_names(jdbcConn.jdbc_connection_url, jdbcConn.jdbc_connection_schema) + create_connection_response = {} + # get_db_names(jdbcConn.jdbc_connection_url, jdbcConn.jdbc_connection_schema) account_id, region = __get_admin_info(jdbcConn) crawler_role_arn = __gen_role_arn(account_id=account_id, region=region, @@ -1720,7 +1746,7 @@ def add_jdbc_conn(jdbcConn: JDBCInstanceSource): availability_zone = ec2_client.describe_subnets(SubnetIds=[jdbcConn.network_subnet_id])['Subnets'][0]['AvailabilityZone'] try: connectionProperties_dict = gen_conn_properties(jdbcConn) - response = __glue(account=account_id, region=region).create_connection( + create_connection_response = __glue(account=account_id, region=region).create_connection( CatalogId=account_id, ConnectionInput={ 'Name': glue_connection_name, @@ -1741,19 +1767,21 @@ def add_jdbc_conn(jdbcConn: JDBCInstanceSource): }, ) except ClientError as ce: - logger.error(traceback.format_exc()) if ce.response['Error']['Code'] == 'InvalidInputException': raise BizException(MessageEnum.SOURCE_JDBC_INPUT_INVALID.get_code(), MessageEnum.SOURCE_JDBC_INPUT_INVALID.get_msg()) - + if ce.response['Error']['Code'] == 'AlreadyExistsException': + raise BizException(MessageEnum.SOURCE_JDBC_ALREADY_EXISTS.get_code(), + MessageEnum.SOURCE_JDBC_ALREADY_EXISTS.get_msg()) except Exception as e: logger.error(traceback.format_exc()) - if response['ResponseMetadata']['HTTPStatusCode'] != 200: + if create_connection_response.get('ResponseMetadata', {}).get('HTTPStatusCode') != 200: raise BizException(MessageEnum.SOURCE_JDBC_CREATE_FAIL.get_code(), MessageEnum.SOURCE_JDBC_CREATE_FAIL.get_msg()) # Create Crawler - db_names = get_db_names(jdbcConn.jdbc_connection_url, jdbcConn.jdbc_connection_schema) + jdbc_source = JdbcSource(connection_url=jdbcConn.jdbc_connection_url, username=jdbcConn.master_username, password=jdbcConn.password, secret_id=jdbcConn.secret) + db_names = get_db_names_4_jdbc(jdbcConn.jdbc_connection_url, jdbcConn.jdbc_connection_schema, jdbc_source) for db_name in db_names: trimmed_db_name = db_name.strip() if trimmed_db_name: @@ -1762,7 +1790,7 @@ def add_jdbc_conn(jdbcConn: JDBCInstanceSource): 'Path': f"{trimmed_db_name}/%" }) try: - response = glue.create_crawler( + glue.create_crawler( Name=crawler_name, Role=crawler_role_arn, DatabaseName=glue_database_name, @@ -1817,9 +1845,6 @@ def add_jdbc_conn(jdbcConn: JDBCInstanceSource): else: raise BizException(MessageEnum.BIZ_UNKNOWN_ERR.get_code(), MessageEnum.BIZ_UNKNOWN_ERR.get_msg()) - except Exception as e: - raise BizException(MessageEnum.BIZ_UNKNOWN_ERR.get_code(), - MessageEnum.BIZ_UNKNOWN_ERR.get_msg()) def gen_conn_properties(jdbcConn): connectionProperties_dict = {} @@ -1986,7 +2011,6 @@ def __create_jdbc_url(engine: str, host: str, port: str): # Add S3 bucket, SQS queues access policies def __update_access_policy_for_account(): s3_resource = boto3.session.Session().resource('s3') - # for cn_region in const.CN_REGIONS: # check if s3 bucket, sqs exists bucket_name = admin_bucket_name try: @@ -2302,7 +2326,6 @@ def __list_rds_schema(account, region, credentials, instance_name, payload, rds_ logger.info(schema_path) return schema_path - def __delete_data_source_by_account(account_id: str, region: str): try: crud.delete_s3_bucket_source_by_account(account_id=account_id, region=region) @@ -2313,16 +2336,14 @@ def __delete_data_source_by_account(account_id: str, region: str): except Exception: logger.error(traceback.format_exc()) - def __delete_account(account_id: str, region: str): try: crud.delete_account_by_region(account_id=account_id, region=region) except Exception: logger.error(traceback.format_exc()) - def query_glue_connections(account: AccountInfo): - res, list = [] + res, list = [], [] account_id, region = __get_admin_info(account) next_token = "" @@ -2340,7 +2361,7 @@ def query_glue_connections(account: AccountInfo): if not next_token: break jdbc_list = query_jdbc_connections_sub_info() - jdbc_dict = {item[0]:f"{convert_provider_id_2_name(item[1])}-{item[2]}" for item in jdbc_list} + jdbc_dict = {item[0]: f"{convert_provider_id_2_name(item[1])}-{item[2]}" for item in jdbc_list} for item in list: if not item['Name'].startswith(const.SOLUTION_NAME): if item['Name'] in jdbc_dict: @@ -2370,35 +2391,36 @@ def query_glue_databases(account: AdminAccountInfo): def query_account_network(account: AccountInfo): account_id, region = __get_admin_info(account) - logger.info(f'accont_id is:{account_id},region is {region}') ec2_client, __ = __ec2(account=account_id, region=region) vpcs = query_all_vpc(ec2_client) - # vpcs = [vpc['VpcId'] for vpc in query_all_vpc(ec2_client)] vpc_list = [{"vpcId": vpc.get('VpcId'), "name": gen_resource_name(vpc)} for vpc in vpcs] - # vpc_list = [{"vpcId": vpc['VpcId'], "name": gen_resource_name(vpc)} for vpc in vpcs] if account.account_provider_id != Provider.AWS_CLOUD.value: res = __query_third_account_network(vpc_list, ec2_client) - logger.info(f"query_third_account_network res is {res}") return res else: return __query_aws_account_network(vpc_list, ec2_client) +# async def add_conn_jdbc_async(jdbcConn: JDBCInstanceSource): +# key = f"{jdbcConn.account_provider_id}/{jdbcConn.account_id}/{jdbcConn.region}" +# try: +# add_jdbc_conn(jdbcConn) +# return (key, "SUCCESSED", "") +# except Exception as e: +# return (key, "FAILED", str(e)) + def __query_third_account_network(vpc_list, ec2_client: any): try: - response = ec2_client.describe_security_groups(Filters=[ {'Name': 'vpc-id', 'Values': [vpc["vpcId"] for vpc in vpc_list]}, {'Name': 'group-name', 'Values': [const.SECURITY_GROUP_JDBC]} ]) vpc_ids = [item['VpcId'] for item in response['SecurityGroups']] subnets = ec2_client.describe_subnets(Filters=[{'Name': 'vpc-id', 'Values': [vpc_ids[0]]}])['Subnets'] - # private_subnet = list(filter(lambda x: not x["MapPublicIpOnLaunch"], subnets)) selected_subnet = subnets subnets_str_from_env = os.getenv('SubnetIds', '') if subnets_str_from_env: subnets_from_env = subnets_str_from_env.split(',') selected_subnet = [item for item in subnets if item.get('SubnetId') in subnets_from_env] - # target_subnet = private_subnet[0] if private_subnet else subnets[0] target_subnets = [{'subnetId': subnet["SubnetId"], 'arn': subnet["SubnetArn"], "subnetName": gen_resource_name(subnet)} for subnet in selected_subnet] vpc_info = ec2_client.describe_vpcs(VpcIds=[vpc_ids[0]])['Vpcs'][0] return {"vpcs": [{'vpcId': vpc_info['VpcId'], @@ -2407,15 +2429,6 @@ def __query_third_account_network(vpc_list, ec2_client: any): 'securityGroups': [{'securityGroupId': response['SecurityGroups'][0]['GroupId'], 'securityGroupName': response['SecurityGroups'][0]['GroupName']}]}] } - # return {"vpcs": [{'vpcId': vpc_info['VpcId'], - # 'vpcName': [obj for obj in vpc_info['Tags'] if obj["Key"] == "Name"][0]["Value"], - # 'subnets': [{'subnetId': target_subnet['SubnetId'], - # 'arn': target_subnet['SubnetArn'], - # "subnetName": gen_resource_name(target_subnet) - # }], - # 'securityGroups': [{'securityGroupId': response['SecurityGroups'][0]['GroupId'], - # 'securityGroupName': response['SecurityGroups'][0]['GroupName']}]}] - # } except ClientError as ce: logger.error(traceback.format_exc()) if ce.response['Error']['Code'] == 'InvalidGroup.NotFound': @@ -2483,7 +2496,6 @@ def list_data_location(): res = sorted(res, key=lambda x: x.account_count, reverse=True) return res - def query_regions_by_provider(provider_id: int): return crud.query_regions_by_provider(provider_id) @@ -2511,6 +2523,25 @@ def query_full_provider_resource_infos(): def list_providers(): return crud.query_provider_list() +def get_db_names_4_jdbc(jdbc: JdbcSource, schemas: str): + if not __validate_jdbc_url(jdbc.connection_url): + raise BizException(MessageEnum.SOURCE_JDBC_URL_FORMAT_ERROR.get_code(), + MessageEnum.SOURCE_JDBC_URL_FORMAT_ERROR.get_msg()) + # list schemas + db_names = set() + if jdbc.connection_url.startswith('jdbc:mysql'): + schemas = list_jdbc_databases(jdbc) + return set(schemas) + else: + schema = get_schema_from_url(jdbc.connection_url) + if schema: + db_names.add(schema) + if schemas: + db_names.update(schemas.splitlines()) + if not db_names: + raise BizException(MessageEnum.SOURCE_JDBC_JDBC_NO_DATABASE.get_code(), + MessageEnum.SOURCE_JDBC_JDBC_NO_DATABASE.get_msg()) + return db_names def get_db_names(url: str, schemas: str): if not __validate_jdbc_url(url): @@ -2528,7 +2559,6 @@ def get_db_names(url: str, schemas: str): MessageEnum.SOURCE_JDBC_JDBC_NO_DATABASE.get_msg()) return db_names - def get_schema_from_url(url): for pattern in _jdbc_url_patterns: match = re.match(pattern, url) @@ -2631,12 +2661,11 @@ def list_jdbc_databases(source: JdbcSource) -> list[str]: logger.info(databases) return databases - def batch_create(file: UploadFile = File(...)): time_str = time.time() - # batch_id=f"batch_create_jdbc_{time_str}" jdbc_from_excel_set = set() created_jdbc_list = [] + account_set = set() # Check if the file is an Excel file if not file.filename.endswith('.xlsx'): raise BizException(MessageEnum.SOURCE_BATCH_CREATE_FORMAT_ERR.get_code(), @@ -2665,52 +2694,124 @@ def batch_create(file: UploadFile = File(...)): __add_error_msg(sheet, max_column, row_index, f"The value of {header[1]} must be 0 or 1") elif not __validate_jdbc_url(str(row[3].value)): __add_error_msg(sheet, max_column, row_index, f"The value of {header[3]} must be in the format jdbc:protocol://host:port") - elif f"{row[0].value}/{row[7].value}/{row[8].value}/{row[9].value}" in jdbc_from_excel_set: + elif not str(row[3].value).startswith('jdbc:mysql') and not row[4].value: + __add_error_msg(sheet, max_column, row_index, f"MySQL-type data source {header[4]} cannot be null") + elif len(str(row[2].value)) > const.CONNECTION_DESC_MAX_LEN: + __add_error_msg(sheet, max_column, row_index, f"The value of {header[2]} must not exceed 2048") + elif f"{row[9].value}/{row[7].value}/{row[8].value}/{row[0].value}" in jdbc_from_excel_set: __add_error_msg(sheet, max_column, row_index, f"The value of {header[0]}, {header[7]}, {header[8]}, {header[9]} already exist in the preceding rows") elif f"{row[9].value}/{row[7].value}/{row[8].value}" not in accounts_list: __add_error_msg(sheet, max_column, row_index, "The account is not existed!") else: - jdbc_from_excel_set.add(f"{row[0].value}/{row[7].value}/{row[8].value}/{row[9].value}") + jdbc_from_excel_set.add(f"{row[9].value}/{row[7].value}/{row[8].value}/{row[0].value}") + account_set.add(f"{row[9].value}/{row[7].value}/{row[8].value}") created_jdbc_list.append(__gen_created_jdbc(row)) - batch_create_jdbc(created_jdbc_list) - # TODO:write into excel - # TODO:upload to S3 - for row_num, row in enumerate(sheet.iter_rows(values_only=True, min_row=3)): - print(f"{row}") - return time_str + # Query network info + if account_set: + account_info = list(account_set)[0].split("/") + network = query_account_network(AccountInfo(account_provider_id=account_info[0], account_id=account_info[1], region=account_info[2])) \ + .get('vpcs', [])[0] + vpc_id = network.get('vpcId') + subnets = [subnet.get('subnetId') for subnet in network.get('subnets')] + security_group_id = network.get('securityGroups', [])[0].get('securityGroupId') + created_jdbc_list = map_network_jdbc(created_jdbc_list, vpc_id, subnets, security_group_id) + batch_result = asyncio.run(batch_add_conn_jdbc(created_jdbc_list)) + result = {f"{item[0]}/{item[1]}/{item[2]}/{item[3]}": f"{item[4]}/{item[5]}" for item in batch_result} + for row_index, row in enumerate(sheet.iter_rows(min_row=3), start=2): + if row[10].value: + continue + v = result.get(f"{row[9].value}/{row[7].value}/{row[8].value}/{row[0].value}") + if v: + if v.split('/')[0]=="SUCCESSED": + __add_success_msg(sheet, max_column, row_index) + else: + __add_error_msg(sheet, max_column, row_index, v.split('/')[1]) + else: + raise BizException(MessageEnum.SOURCE_BATCH_SHEET_NOT_FOUND.get_code(), + MessageEnum.SOURCE_BATCH_SHEET_NOT_FOUND.get_msg()) + # Write into excel + excel_bytes = BytesIO() + workbook.save(excel_bytes) + excel_bytes.seek(0) + # Upload to S3 + batch_create_ds = f"{const.BATCH_CREATE_REPORT_PATH}/report_{time_str}.xlsx" + __s3_client.upload_fileobj(excel_bytes, admin_bucket_name, batch_create_ds) + print(f"cost:{time.time()-time_str}") + return f'report_{time_str}' + +def map_network_jdbc(created_jdbc_list: [JDBCInstanceSource], vpc_id, subnets, security_group_id): + res = [] + for item in created_jdbc_list: + item.network_sg_id = security_group_id + item.network_subnet_id = random.choice(subnets) + res.append(item) + return res +def query_batch_status(filename: str): + file_key = f"{const.BATCH_CREATE_REPORT_PATH}/{filename}.xlsx" + response = __s3_client.list_objects_v2(Bucket=admin_bucket_name, Prefix=const.BATCH_CREATE_REPORT_PATH) + for obj in response.get('Contents', []): + if obj['Key'] == file_key: + response = __s3_client.get_object(Bucket=admin_bucket_name, Key=file_key) + excel_bytes = response['Body'].read() + workbook = openpyxl.load_workbook(BytesIO(excel_bytes)) + try: + sheet = workbook[const.BATCH_SHEET] + except KeyError: + raise BizException(MessageEnum.SOURCE_BATCH_SHEET_NOT_FOUND.get_code(), + MessageEnum.SOURCE_BATCH_SHEET_NOT_FOUND.get_msg()) + for _, row in enumerate(sheet.iter_rows(values_only=True, min_row=3)): + if row[10] == "FAILED": + return 1 + return 2 + return 0 + +def download_batch_file(filename: str): + key = const.BATCH_CREATE_TEMPLATE_PATH if filename == "template" else f'{const.BATCH_CREATE_REPORT_PATH}/{filename}.xlsx' + url = __s3_client.generate_presigned_url( + ClientMethod="get_object", + Params={'Bucket': admin_bucket_name, 'Key': key}, + ExpiresIn=60 + ) + return url def __add_error_msg(sheet, max_column, row_index, msg): sheet.cell(row=row_index + 1, column=max_column + 1, value="FAILED") sheet.cell(row=row_index + 1, column=max_column + 2, value=msg) - # print(f"$$$$$$$$$ content is : {content}") - # df = pd.read_excel(BytesIO(content), engine='openpyxl') - # print(f"$$$$$$$$$ lines is : {df.shape[0]}") - # df = pd.read_excel(file) - # if df.shape[0] > const.BATCH_CREATE_LIMIT + 2: - # raise BizException(MessageEnum.SOURCE_BATCH_CREATE_LIMIT_ERR.get_code(), - # MessageEnum.SOURCE_BATCH_CREATE_LIMIT_ERR.get_msg()) - # print(f"$$$$$$$$${df.to_json(orient='records')}") - - # Further processing if needed - # jdbc_list = df.to_json(orient='records') - # asyncio.run(batch_create_jdbc(jdbc_list)) +def __add_success_msg(sheet, max_column, row_index): + sheet.cell(row=row_index + 1, column=max_column + 1, value="SUCCESSED") def __gen_created_jdbc(row): created_jdbc = JDBCInstanceSource() - # TODO + created_jdbc.instance_id = row[0].value + created_jdbc.jdbc_enforce_ssl = "true" if row[1].value == 1 else "false" + created_jdbc.description = str(row[2].value) + created_jdbc.jdbc_connection_url = str(row[3].value) + created_jdbc.jdbc_connection_schema = str(row[4].value).replace(",", "\n") if row[4].value else const.EMPTY_STR + created_jdbc.master_username = str(row[5].value) + created_jdbc.password = str(row[6].value) + created_jdbc.account_id = str(row[7].value) + created_jdbc.region = str(row[8].value) + created_jdbc.account_provider_id = row[9].value + created_jdbc.creation_time = "" + created_jdbc.custom_jdbc_cert = "" + created_jdbc.custom_jdbc_cert_string = "" + created_jdbc.jdbc_driver_class_name = "" + created_jdbc.jdbc_driver_jar_uri = "" + created_jdbc.last_updated_time = "" + created_jdbc.network_availability_zone = "" + created_jdbc.secret = "" + created_jdbc.skip_custom_jdbc_cert_validation = "false" return created_jdbc - -async def batch_create_jdbc(jdbc_list): - tasks = [add_jdbc_conn(jdbc) for jdbc in jdbc_list] - await asyncio.gather(*tasks) - +async def batch_add_conn_jdbc(created_jdbc_list): + tasks = [asyncio.create_task(__add_jdbc_conn_batch(jdbc)) for jdbc in created_jdbc_list] + return await asyncio.gather(*tasks) def get_schema_by_snapshot(provider_id: int, account_id: str, instance: str, region: str): res = crud.get_schema_by_snapshot(provider_id, account_id, instance, region) - return res[0][0].split('\n') if res else None, res[0][1] if res else None + return res[0][0].replace(',', '\n').split('\n') if res else None, res[0][1] if res else None def get_schema_by_real_time(provider_id: int, account_id: str, instance: str, region: str, db_info: bool = False): db, subnet_id = None, None @@ -2731,7 +2832,8 @@ def get_schema_by_real_time(provider_id: int, account_id: str, instance: str, re return db, subnet_id def sync_schema_by_job(provider_id: int, account_id: str, instance: str, region: str, schema: str): - jdbc_targets = [] + jdbc = JDBCInstanceSourceBase(instance_id=instance, account_provider_id=provider_id, account_id=account_id, region=region) + account_id, region = __get_admin_info(jdbc) # Query Info info = crud.get_crawler_glueDB_by_instance(provider_id, account_id, instance, region) if not info: @@ -2740,20 +2842,33 @@ def sync_schema_by_job(provider_id: int, account_id: str, instance: str, region: region=region, role_name='GlueDetectionJobRole') db_names = schema.split("\n") + jdbc_targets = __gen_jdbc_targets_from_db_names(info[0][2], db_names) + # Update Crawler + __update_crawler(provider_id, account_id, instance, region, jdbc_targets, info[0][0], info[0][1], crawler_role_arn) + # Update RDS + crud.update_schema_by_account(provider_id, account_id, instance, region, schema) + +def __gen_jdbc_targets_from_db_names(connection_name, db_names): + jdbc_targets = [] for db_name in db_names: trimmed_db_name = db_name.strip() if trimmed_db_name: jdbc_targets.append({ - 'ConnectionName': info[0][2], + 'ConnectionName': connection_name, 'Path': f"{trimmed_db_name}/%" }) - # Update Crawler - assume_account, assume_region = __get_admin_info(JDBCInstanceSourceBase(account_provider_id=provider_id, account_id=account_id, instance_id=instance, region=region)) + return jdbc_targets + +def __update_crawler(provider_id, account_id, instance, region, jdbc_targets, crawler_name, glue_database, crawler_role_arn): + assume_account, assume_region = __get_admin_info(JDBCInstanceSourceBase(account_provider_id=provider_id, + account_id=account_id, + instance_id=instance, + region=region)) try: __get_glue_client(assume_account, assume_region).update_crawler( - Name=info[0], + Name=crawler_name, Role=crawler_role_arn, - DatabaseName=info[1], + DatabaseName=glue_database, Targets={ 'JdbcTargets': jdbc_targets, }, @@ -2766,10 +2881,17 @@ def sync_schema_by_job(provider_id: int, account_id: str, instance: str, region: logger.error(traceback.format_exc()) raise BizException(MessageEnum.BIZ_UNKNOWN_ERR.get_code(), MessageEnum.BIZ_UNKNOWN_ERR.get_msg()) - # Update RDS - crud.update_schema_by_account(provider_id, account_id, instance, region, schema) def __get_admin_info(jdbc): account_id = jdbc.account_id if jdbc.account_provider_id == Provider.AWS_CLOUD.value else admin_account_id region = jdbc.region if jdbc.account_provider_id == Provider.AWS_CLOUD.value else admin_region return account_id, region + +async def __add_jdbc_conn_batch(jdbc: JDBCInstanceSource): + try: + add_jdbc_conn(jdbc) + return jdbc.account_provider_id, jdbc.account_id, jdbc.region, jdbc.instance_id, "SUCCESSED", None + except BizException as be: + return jdbc.account_provider_id, jdbc.account_id, jdbc.region, jdbc.instance_id, "FAILED", be.__msg__() + except Exception as e: + return jdbc.account_provider_id, jdbc.account_id, jdbc.region, jdbc.instance_id, "FAILED", str(e) From d54bbd5e973216a2b39538fca71e7b92815cfcb9 Mon Sep 17 00:00:00 2001 From: junzhong Date: Thu, 25 Jan 2024 09:21:23 +0800 Subject: [PATCH 010/112] fix(cdk): config --- source/constructs/api/common/constant.py | 8 ++++---- source/constructs/api/config/main.py | 6 ------ source/constructs/api/discovery_job/service.py | 4 ++-- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/source/constructs/api/common/constant.py b/source/constructs/api/common/constant.py index 551d0329..ebac0610 100644 --- a/source/constructs/api/common/constant.py +++ b/source/constructs/api/common/constant.py @@ -100,10 +100,10 @@ def __setattr__(self, name, value): const.BATCH_SHEET = "OriginTemplate" const.CONFIG_CONCURRENT_RUN_INSTANCE_NUMBER = 'ConcurrentRunInstanceNumber' const.CONFIG_CONCURRENT_RUN_INSTANCE_NUMBER_DEFAULT_VALUE = 50 -const.CONFIG_JOB_NUMBER_S3 = 'JobNumberS3' -const.CONFIG_JOB_NUMBER_S3_DEFAULT_VALUE = 10 -const.CONFIG_JOB_NUMBER_RDS = 'JobNumberRds' -const.CONFIG_JOB_NUMBER_RDS_DEFAULT_VALUE = 3 +const.CONFIG_SUB_JOB_NUMBER_S3 = 'SubJobNumberS3' +const.CONFIG_SUB_JOB_NUMBER_S3_DEFAULT_VALUE = 10 +const.CONFIG_SUB_JOB_NUMBER_RDS = 'SubJobNumberRds' +const.CONFIG_SUB_JOB_NUMBER_RDS_DEFAULT_VALUE = 3 const.CONTROLLER_ACTION = 'Action' const.CONTROLLER_ACTION_SCHEDULE_JOB = 'ScheduleJob' const.CONTROLLER_ACTION_CHECK_RUNNING_RUN_DATABASES = 'CheckRunningRunDatabases' diff --git a/source/constructs/api/config/main.py b/source/constructs/api/config/main.py index 148229cb..4bd83c1d 100644 --- a/source/constructs/api/config/main.py +++ b/source/constructs/api/config/main.py @@ -23,9 +23,3 @@ def set_config(configs: list[schemas.ConfigBase]): @inject_session def list_subnets(): return service.list_subnets() - - -@router.get("/run-database-ip-count", response_model=BaseResponse[int]) -@inject_session -def get_run_database_ip_count(database_type: str): - return discovery_job_service.get_run_database_ip_count(database_type) diff --git a/source/constructs/api/discovery_job/service.py b/source/constructs/api/discovery_job/service.py index 3d820ac0..38d7750e 100644 --- a/source/constructs/api/discovery_job/service.py +++ b/source/constructs/api/discovery_job/service.py @@ -219,8 +219,8 @@ def start_sample_job(job_id: int, table_name: str): def __get_job_number(database_type: str) -> int: if database_type in [DatabaseType.S3.value, DatabaseType.GLUE.value]: - return int(config_service.get_config(const.CONFIG_JOB_NUMBER_S3, const.CONFIG_JOB_NUMBER_S3_DEFAULT_VALUE)) - return int(config_service.get_config(const.CONFIG_JOB_NUMBER_RDS, const.CONFIG_JOB_NUMBER_RDS_DEFAULT_VALUE)) + return int(config_service.get_config(const.CONFIG_SUB_JOB_NUMBER_S3, const.CONFIG_SUB_JOB_NUMBER_S3_DEFAULT_VALUE)) + return int(config_service.get_config(const.CONFIG_SUB_JOB_NUMBER_RDS, const.CONFIG_SUB_JOB_NUMBER_RDS_DEFAULT_VALUE)) def get_run_database_ip_count(database_type: str) -> int: From e254f97ffa63e3471b5bd6f836578c2fdb8b1e3f Mon Sep 17 00:00:00 2001 From: cuihubin <530051970@qq.com> Date: Thu, 25 Jan 2024 11:19:38 +0800 Subject: [PATCH 011/112] merge conflict --- source/constructs/api/common/constant.py | 6 +++--- source/constructs/api/data_source/main.py | 8 ++++---- source/constructs/api/data_source/service.py | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/source/constructs/api/common/constant.py b/source/constructs/api/common/constant.py index e6b894ec..dbe52908 100644 --- a/source/constructs/api/common/constant.py +++ b/source/constructs/api/common/constant.py @@ -98,11 +98,11 @@ def __setattr__(self, name, value): const.ZERO = 0 const.BATCH_CREATE_LIMIT = 1000 const.BATCH_SHEET = "OriginTemplate" -<<<<<<< HEAD + const.CONNECTION_DESC_MAX_LEN = 10 const.BATCH_CREATE_TEMPLATE_PATH = 'batch-create-jdbc-datasource/template/batch-create-jdbc-datasource.xlsx' const.BATCH_CREATE_REPORT_PATH = 'batch-create-jdbc-datasource/report' -======= + const.CONFIG_CONCURRENT_RUN_INSTANCE_NUMBER = 'ConcurrentRunInstanceNumber' const.CONFIG_CONCURRENT_RUN_INSTANCE_NUMBER_DEFAULT_VALUE = 50 const.CONFIG_SUB_JOB_NUMBER_S3 = 'SubJobNumberS3' @@ -114,7 +114,7 @@ def __setattr__(self, name, value): const.CONTROLLER_ACTION_CHECK_RUNNING_RUN_DATABASES = 'CheckRunningRunDatabases' const.CONTROLLER_ACTION_CHECK_PENDING_RUN_DATABASES = 'CheckPendingRunDatabases' const.CONTROLLER_ACTION_REFRESH_ACCOUNT = 'RefreshAccount' ->>>>>>> 488eedeb27411ab78c59e3d7874eeda183fcf07d + const.UNSTRUCTURED_FILES = { "document": ["doc", "docx", "pdf", "ppt", "pptx", "xls", "xlsx", "odp"], diff --git a/source/constructs/api/data_source/main.py b/source/constructs/api/data_source/main.py index 4d538501..48e142e0 100644 --- a/source/constructs/api/data_source/main.py +++ b/source/constructs/api/data_source/main.py @@ -398,7 +398,7 @@ def query_batch_status(batch: str): def download_batch_file(filename: str): return service.download_batch_file(filename) -@router.post("/batch-sync-jdbc", response_model=BaseResponse) -@inject_session -def batch_sync_jdbc(connection_list: [schemas.JDBCInstanceSourceBase]): - return service.batch_sync_jdbc(connection_list) +# @router.post("/batch-sync-jdbc", response_model=BaseResponse) +# @inject_session +# def batch_sync_jdbc(connection_list: [schemas.JDBCInstanceSourceBase]): +# return service.batch_sync_jdbc(connection_list) diff --git a/source/constructs/api/data_source/service.py b/source/constructs/api/data_source/service.py index b077cb9e..77d61603 100644 --- a/source/constructs/api/data_source/service.py +++ b/source/constructs/api/data_source/service.py @@ -31,7 +31,7 @@ from discovery_job.service import can_delete_database as can_delete_job_database from discovery_job.service import delete_account as delete_job_by_account from discovery_job.service import delete_database as delete_job_database -from source.constructs.api.data_source.jdbc_schema import list_jdbc_databases +from .jdbc_schema import list_jdbc_databases from . import s3_detector, rds_detector, glue_database_detector, jdbc_detector, crud from .schemas import (AccountInfo, AdminAccountInfo, JDBCInstanceSource, JDBCInstanceSourceUpdate, JdbcSource, From b9964847733c022310c8e230e9cb4992f3933f1d Mon Sep 17 00:00:00 2001 From: Magic Chen Date: Thu, 25 Jan 2024 16:42:50 +0800 Subject: [PATCH 012/112] fix: fix upload status --- .../src/pages/batch-operation/index.tsx | 54 +++++++++++-------- source/portal/src/ts/common.ts | 1 + 2 files changed, 34 insertions(+), 21 deletions(-) diff --git a/source/portal/src/pages/batch-operation/index.tsx b/source/portal/src/pages/batch-operation/index.tsx index 25a1c3f5..87f49a79 100644 --- a/source/portal/src/pages/batch-operation/index.tsx +++ b/source/portal/src/pages/batch-operation/index.tsx @@ -20,10 +20,11 @@ import Navigation from 'pages/left-menu/Navigation'; import { RouterEnum } from 'routers/routerEnum'; import { useTranslation } from 'react-i18next'; import HelpInfo from 'common/HelpInfo'; -import { buildDocLink } from 'ts/common'; +import { BATCH_SOURCE_ID, buildDocLink } from 'ts/common'; import axios from 'axios'; import { BASE_URL } from 'tools/apiRequest'; import { downloadBatchFiles, queryBatchStatus } from 'apis/data-source/api'; +import { alertMsg } from 'tools/tools'; enum BatchOperationStatus { NotStarted = 'NotStarted', @@ -61,19 +62,19 @@ const BatchOperationContent: React.FC = ( const response: any = await queryBatchStatus({ batch: fileId, }); - const status = response.data; // 0: Inprogress, 1: Completed, 2: Error + const status = response.data; // 0: Inprogress, 1: Error, 2: Completed if (status === 1 || status === 2) { clearInterval(statusInterval); } if (status === 1) { - updateStatus(BatchOperationStatus.Completed); - } else if (status === 2) { updateStatus(BatchOperationStatus.Error); + } else if (status === 2) { + updateStatus(BatchOperationStatus.Completed); } else { updateStatus(BatchOperationStatus.Inprogress); } } catch (error) { - console.error('查询状态失败:', error); + console.error('error:', error); clearInterval(statusInterval); } }; @@ -112,12 +113,19 @@ const BatchOperationContent: React.FC = ( }, } ); - setLoadingUpload(false); - const fileId = response.data.data; - localStorage.setItem('batchFileId', fileId); - updateStatus(BatchOperationStatus.Inprogress); - statusInterval = setInterval(() => queryStatus(fileId), 5000); console.log(response.data); + setLoadingUpload(false); + if (response.data.status === 'success') { + const fileId = response.data.data; + localStorage.setItem(BATCH_SOURCE_ID, fileId); + updateStatus(BatchOperationStatus.Inprogress); + statusInterval = setInterval(() => { + queryStatus(fileId); + }, 5000); + } else { + setUploadProgress(0); + alertMsg(response.data.message ?? '', 'error'); + } } catch (error) { setLoadingUpload(false); console.error(error); @@ -125,10 +133,12 @@ const BatchOperationContent: React.FC = ( }; useEffect(() => { - const fileId = localStorage.getItem('batchFileId'); + const fileId = localStorage.getItem(BATCH_SOURCE_ID); if (fileId) { queryStatus(fileId); - statusInterval = setInterval(() => queryStatus(fileId), 5000); + statusInterval = setInterval(() => { + queryStatus(fileId); + }, 5000); } return () => { clearInterval(statusInterval); @@ -235,13 +245,14 @@ const BatchOperation: React.FC = () => { const downloadReport = async () => { console.log('download report'); - const fileName = localStorage.getItem('batchFileId'); + const fileName = localStorage.getItem(BATCH_SOURCE_ID); if (fileName) { - const response = await downloadBatchFiles({ - filename: 'batch_1705900337.8425026', + const response: any = await downloadBatchFiles({ + // filename: 'batch_1705900337.8425026', + filename: fileName, }); console.info('response:', response); - // TODO: download file + window.open(response.data); } }; @@ -259,6 +270,9 @@ const BatchOperation: React.FC = () => { {t('button.downloadReport')} ), + onDismiss: () => { + setFlashBar([]); + }, }, ]); } @@ -276,6 +290,9 @@ const BatchOperation: React.FC = () => { {t('button.downloadReport')} ), + onDismiss: () => { + setFlashBar([]); + }, }, ]); } @@ -289,11 +306,6 @@ const BatchOperation: React.FC = () => { content: 'Creating databases, Please do not close this window. It will takes less than 15 minutes.', id: 'info', - action: ( - - ), }, ]); } diff --git a/source/portal/src/ts/common.ts b/source/portal/src/ts/common.ts index 9b312d5e..f5644df4 100644 --- a/source/portal/src/ts/common.ts +++ b/source/portal/src/ts/common.ts @@ -11,6 +11,7 @@ export const EN_DOC_LINK = 'https://awslabs.github.io/sensitive-data-protection-on-aws/en'; export const SDPS_DEBUG_MODE = 'SDPS_DEBUG_MODE'; +export const BATCH_SOURCE_ID = 'SDPS_BATCH_FILE_ID'; export interface ColumnList { id: string; label: string; From 27deadfe36a6337eedcb2a07fa4055f129ef95ec Mon Sep 17 00:00:00 2001 From: Magic Chen Date: Thu, 25 Jan 2024 16:55:42 +0800 Subject: [PATCH 013/112] fix: fix dlownload api --- .../src/pages/batch-operation/index.tsx | 26 ++++++++++++++++--- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/source/portal/src/pages/batch-operation/index.tsx b/source/portal/src/pages/batch-operation/index.tsx index 87f49a79..fbda60b3 100644 --- a/source/portal/src/pages/batch-operation/index.tsx +++ b/source/portal/src/pages/batch-operation/index.tsx @@ -49,7 +49,7 @@ let statusInterval: any; const BatchOperationContent: React.FC = ( props: BatchOperationContentProps ) => { - const { t } = useTranslation(); + const { t, i18n } = useTranslation(); const { updateStatus } = props; const [uploadDisabled, setUploadDisabled] = useState(false); const [files, setFiles] = useState([] as any); @@ -132,6 +132,17 @@ const BatchOperationContent: React.FC = ( } }; + const downloadReport = async () => { + console.log('download template'); + const fileName = `template_${i18n.language}`; + if (fileName) { + const response: any = await downloadBatchFiles({ + filename: fileName, + }); + window.open(response.data); + } + }; + useEffect(() => { const fileId = localStorage.getItem(BATCH_SOURCE_ID); if (fileId) { @@ -155,10 +166,17 @@ const BatchOperationContent: React.FC = ( } >

- - + {/* */} +

Date: Thu, 25 Jan 2024 16:59:26 +0800 Subject: [PATCH 014/112] fix: fix download template --- source/portal/src/pages/batch-operation/index.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/portal/src/pages/batch-operation/index.tsx b/source/portal/src/pages/batch-operation/index.tsx index fbda60b3..0edce79f 100644 --- a/source/portal/src/pages/batch-operation/index.tsx +++ b/source/portal/src/pages/batch-operation/index.tsx @@ -134,7 +134,7 @@ const BatchOperationContent: React.FC = ( const downloadReport = async () => { console.log('download template'); - const fileName = `template_${i18n.language}`; + const fileName = `template-${i18n.language}`; if (fileName) { const response: any = await downloadBatchFiles({ filename: fileName, From 5c1bd145ba87f93e97f6192d3e56d724f2902384 Mon Sep 17 00:00:00 2001 From: Magic Chen Date: Thu, 25 Jan 2024 17:54:07 +0800 Subject: [PATCH 015/112] fix: batch upload files --- .../src/pages/batch-operation/index.tsx | 37 +++++++++++++------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/source/portal/src/pages/batch-operation/index.tsx b/source/portal/src/pages/batch-operation/index.tsx index 0edce79f..47cacf22 100644 --- a/source/portal/src/pages/batch-operation/index.tsx +++ b/source/portal/src/pages/batch-operation/index.tsx @@ -8,7 +8,6 @@ import { FlashbarProps, FormField, Header, - Icon, ProgressBar, SpaceBetween, StatusIndicator, @@ -36,6 +35,14 @@ interface BatchOperationContentProps { updateStatus: (status: BatchOperationStatus) => void; } +const startDownload = (url: string) => { + const link = document.createElement('a'); + link.href = url; + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); +}; + const AddAccountHeader: React.FC = () => { const { t } = useTranslation(); return ( @@ -56,13 +63,14 @@ const BatchOperationContent: React.FC = ( const [errors, setErrors] = useState([] as any); const [uploadProgress, setUploadProgress] = useState(0); const [loadingUpload, setLoadingUpload] = useState(false); + const [loadingDownload, setLoadingDownload] = useState(false); const queryStatus = async (fileId: string) => { try { - const response: any = await queryBatchStatus({ + const status: any = await queryBatchStatus({ batch: fileId, }); - const status = response.data; // 0: Inprogress, 1: Error, 2: Completed + // 0: Inprogress, 1: Error, 2: Completed if (status === 1 || status === 2) { clearInterval(statusInterval); } @@ -134,12 +142,14 @@ const BatchOperationContent: React.FC = ( const downloadReport = async () => { console.log('download template'); + setLoadingDownload(true); const fileName = `template-${i18n.language}`; if (fileName) { - const response: any = await downloadBatchFiles({ + const url: any = await downloadBatchFiles({ filename: fileName, }); - window.open(response.data); + setLoadingDownload(false); + startDownload(url); } }; @@ -173,7 +183,7 @@ const BatchOperationContent: React.FC = ( downloadReport(); }} variant="link" - download + loading={loadingDownload} > {t('datasource:batch.step1Download')} @@ -260,17 +270,18 @@ const BatchOperation: React.FC = () => { ); const [status, setStatus] = useState(BatchOperationStatus.NotStarted); + const [loadingDownload, setLoadingDownload] = useState(false); const downloadReport = async () => { console.log('download report'); + setLoadingDownload(true); const fileName = localStorage.getItem(BATCH_SOURCE_ID); if (fileName) { - const response: any = await downloadBatchFiles({ - // filename: 'batch_1705900337.8425026', + const url: any = await downloadBatchFiles({ filename: fileName, }); - console.info('response:', response); - window.open(response.data); + setLoadingDownload(false); + startDownload(url); } }; @@ -284,11 +295,12 @@ const BatchOperation: React.FC = () => { content: 'Please download the report and check the result.', id: 'success', action: ( - ), onDismiss: () => { + localStorage.removeItem(BATCH_SOURCE_ID); setFlashBar([]); }, }, @@ -304,11 +316,12 @@ const BatchOperation: React.FC = () => { 'Please download the report and fix the data to upload again to retry.', id: 'error', action: ( - ), onDismiss: () => { + localStorage.removeItem(BATCH_SOURCE_ID); setFlashBar([]); }, }, From e7b34a39be9383c5dcb98e19389d08e9e43af429 Mon Sep 17 00:00:00 2001 From: junzhong Date: Fri, 26 Jan 2024 09:17:05 +0800 Subject: [PATCH 016/112] fix(be): modify ConcurrentRunJobNumber --- deployment/build-s3-dist.sh | 2 + source/constructs/api/common/abilities.py | 10 +++++ source/constructs/api/common/constant.py | 4 +- source/constructs/api/discovery_job/crud.py | 27 ++++++------ .../constructs/api/discovery_job/schemas.py | 2 + .../constructs/api/discovery_job/service.py | 44 +++++++++++-------- source/constructs/api/logging.conf | 2 +- source/constructs/api/tools/str_tool.py | 7 --- 8 files changed, 56 insertions(+), 42 deletions(-) delete mode 100644 source/constructs/api/tools/str_tool.py diff --git a/deployment/build-s3-dist.sh b/deployment/build-s3-dist.sh index 9970619a..9334942d 100755 --- a/deployment/build-s3-dist.sh +++ b/deployment/build-s3-dist.sh @@ -42,6 +42,8 @@ title "cdk synth" run cd ${SRC_PATH} # Replace before building +sed -i "s|DEBUG|INFO|"g api/logging.conf + sed -i "s|@TEMPLATE_SOLUTION_VERSION@|$SOLUTION_VERSION|"g lib/admin/database/*/*.sql sed -i "s|@TEMPLATE_SOLUTION_VERSION@|$SOLUTION_VERSION|"g lib/agent/DiscoveryJob.json diff --git a/source/constructs/api/common/abilities.py b/source/constructs/api/common/abilities.py index a74c6c36..a226bd3f 100644 --- a/source/constructs/api/common/abilities.py +++ b/source/constructs/api/common/abilities.py @@ -1,6 +1,7 @@ from common.enum import (Provider, ProviderName, DatabaseType) +from common.reference_parameter import logger, admin_account_id def convert_database_type_2_provider(database_type: str) -> int: @@ -43,6 +44,15 @@ def need_change_account_id(database_type: str) -> bool: return True return False + +def is_run_in_admin_vpc(database_type: str, account_id: str = None) -> bool: + if database_type == DatabaseType.JDBC_AWS.value: + return account_id == admin_account_id + elif database_type.startswith(DatabaseType.JDBC.value): + return True + return False + + def query_all_vpc(ec2_client): vpcs = [] response = ec2_client.describe_vpcs() diff --git a/source/constructs/api/common/constant.py b/source/constructs/api/common/constant.py index dbe52908..d544bba3 100644 --- a/source/constructs/api/common/constant.py +++ b/source/constructs/api/common/constant.py @@ -103,8 +103,8 @@ def __setattr__(self, name, value): const.BATCH_CREATE_TEMPLATE_PATH = 'batch-create-jdbc-datasource/template/batch-create-jdbc-datasource.xlsx' const.BATCH_CREATE_REPORT_PATH = 'batch-create-jdbc-datasource/report' -const.CONFIG_CONCURRENT_RUN_INSTANCE_NUMBER = 'ConcurrentRunInstanceNumber' -const.CONFIG_CONCURRENT_RUN_INSTANCE_NUMBER_DEFAULT_VALUE = 50 +const.CONFIG_CONCURRENT_RUN_JOB_NUMBER = 'ConcurrentRunJobNumber' +const.CONFIG_CONCURRENT_RUN_JOB_NUMBER_DEFAULT_VALUE = 50 const.CONFIG_SUB_JOB_NUMBER_S3 = 'SubJobNumberS3' const.CONFIG_SUB_JOB_NUMBER_S3_DEFAULT_VALUE = 10 const.CONFIG_SUB_JOB_NUMBER_RDS = 'SubJobNumberRds' diff --git a/source/constructs/api/discovery_job/crud.py b/source/constructs/api/discovery_job/crud.py index 16f90c99..b157d7bb 100644 --- a/source/constructs/api/discovery_job/crud.py +++ b/source/constructs/api/discovery_job/crud.py @@ -9,10 +9,9 @@ from sqlalchemy import func from common.constant import const import uuid -from datetime import datetime, timedelta +from datetime import datetime from catalog.crud import get_catalog_database_level_classification_by_type_all,get_catalog_database_level_classification_by_params from template.service import get_template_snapshot_no -from tools.str_tool import is_empty def get_job(id: int) -> models.DiscoveryJob: @@ -167,7 +166,18 @@ def init_run(job_id: int) -> int: if job.all_jdbc == 1: __add_job_databases(run, job.database_type, base_time_dict) for job_database in job_databases: - if is_empty(job_database.database_name) and is_empty(job_database.table_name): + if job_database.database_name: + run_database = models.DiscoveryJobRunDatabase(run_id=run.id, + account_id=job_database.account_id, + region=job_database.region, + database_type=job_database.database_type, + database_name=job_database.database_name, + table_name=job_database.table_name, + base_time=job_database.base_time, + state=RunDatabaseState.READY.value, + uuid=uuid.uuid4().hex) + run.databases.append(run_database) + else: catalog_databases = get_catalog_database_level_classification_by_params(job_database.account_id,job_database.region,job_database.database_type).all() for catalog_database in catalog_databases: base_time = base_time_dict.get( @@ -181,17 +191,6 @@ def init_run(job_id: int) -> int: state=RunDatabaseState.READY.value, uuid=uuid.uuid4().hex) run.databases.append(run_database) - else: - run_database = models.DiscoveryJobRunDatabase(run_id=run.id, - account_id=job_database.account_id, - region=job_database.region, - database_type=job_database.database_type, - database_name=job_database.database_name, - table_name=job_database.table_name, - base_time=job_database.base_time, - state=RunDatabaseState.READY.value, - uuid=uuid.uuid4().hex) - run.databases.append(run_database) session.add(run) session.commit() return run.id diff --git a/source/constructs/api/discovery_job/schemas.py b/source/constructs/api/discovery_job/schemas.py index 8a24d7d2..f5771194 100644 --- a/source/constructs/api/discovery_job/schemas.py +++ b/source/constructs/api/discovery_job/schemas.py @@ -42,6 +42,7 @@ class DiscoveryJobRunDatabaseStatus(BaseModel): success_count: int fail_count: int ready_count: int + pending_count: int running_count: int stopped_count: int not_existed_count: int @@ -49,6 +50,7 @@ class DiscoveryJobRunDatabaseStatus(BaseModel): success_per: int fail_per: int ready_per: int + pending_per: int running_per: int stopped_per: int not_existed_per: int diff --git a/source/constructs/api/discovery_job/service.py b/source/constructs/api/discovery_job/service.py index 38d7750e..066df698 100644 --- a/source/constructs/api/discovery_job/service.py +++ b/source/constructs/api/discovery_job/service.py @@ -14,8 +14,7 @@ from openpyxl import Workbook from tempfile import NamedTemporaryFile from catalog.service import sync_job_detection_result -from tools.str_tool import is_empty -from common.abilities import need_change_account_id, convert_database_type_2_provider +from common.abilities import need_change_account_id, convert_database_type_2_provider, is_run_in_admin_vpc import config.service as config_service from data_source import jdbc_schema from tools import list_tool @@ -48,10 +47,7 @@ def create_job(job: schemas.DiscoveryJobCreate): if job.depth_structured is None: job.depth_structured = 0 if job.depth_unstructured is None: - if job.database_type == DatabaseType.S3.value: - job.depth_unstructured = -1 # -1 represents all - else: - job.depth_unstructured = 0 + job.depth_unstructured = 0 db_job = crud.create_job(job) if db_job.schedule != const.ON_DEMAND: create_event(db_job.id, db_job.schedule) @@ -259,10 +255,11 @@ def __start_run_databases(run_databases): account_loop_wait = {} for run_database in run_databases: + if is_run_in_admin_vpc(run_database.database_type, run_database.account_id): + limit_concurrency = True account_id = run_database.account_id if need_change_account_id(run_database.database_type): account_id = admin_account_id - limit_concurrency = True if account_id in account_loop_wait: tmp = account_loop_wait[account_id] tmp = tmp + const.JOB_INTERVAL_WAIT @@ -271,7 +268,8 @@ def __start_run_databases(run_databases): account_loop_wait[account_id] = const.JOB_INTERVAL_WAIT if limit_concurrency: - concurrent_run_instance_number = config_service.get_config(const.CONFIG_CONCURRENT_RUN_INSTANCE_NUMBER, const.CONFIG_CONCURRENT_RUN_INSTANCE_NUMBER_DEFAULT_VALUE) + concurrent_run_job_number = int(config_service.get_config(const.CONFIG_CONCURRENT_RUN_JOB_NUMBER, const.CONFIG_CONCURRENT_RUN_JOB_NUMBER_DEFAULT_VALUE)) + logger.debug(f"concurrent_run_job_number:{concurrent_run_job_number}") count_run_database = __count_run_database_by_subnet() job_placeholder = "," @@ -279,13 +277,18 @@ def __start_run_databases(run_databases): failed_run_database_count = 0 for run_database in run_databases: try: - if limit_concurrency: + if is_run_in_admin_vpc(run_database.database_type, run_database.account_id): + logger.debug(f"database_name:{run_database.database_name}") provider_id = convert_database_type_2_provider(run_database.database_type) database_schemas_real_time, subnet_id = jdbc_schema.get_schema_by_real_time(provider_id, run_database.account_id, run_database.region, run_database.database_name, True) count = count_run_database.get(subnet_id, 0) - if count >= concurrent_run_instance_number: + logger.debug(f"subnet_id:{subnet_id}") + logger.debug(f"count:{count}") + if count >= concurrent_run_job_number: run_database.state = RunDatabaseState.PENDING.value + logger.debug(f"{run_database.database_name} break") continue + logger.debug(f"run_database.database_name add") count_run_database[subnet_id] = count + 1 if database_schemas_real_time: database_schemas_snapshot, _ = jdbc_schema.get_schema_by_snapshot(provider_id, run_database.account_id, run_database.region, run_database.database_name) @@ -322,7 +325,7 @@ def __start_run_databases(run_databases): if job.range == 1 and run_database.base_time: base_time = mytime.format_time(run_database.base_time) need_run_crawler = True - if run_database.database_type == DatabaseType.GLUE.value or not is_empty(run_database.table_name): + if run_database.database_type == DatabaseType.GLUE.value or run_database.table_name: need_run_crawler = False crawler_name = f"{const.SOLUTION_NAME}-{run_database.database_type}-{run_database.database_name}" glue_database_name = f"{const.SOLUTION_NAME}-{run_database.database_type}-{run_database.database_name}" @@ -349,15 +352,15 @@ def __start_run_databases(run_databases): "DatabaseName": run_database.database_name, "GlueDatabaseName": glue_database_name, "UnstructuredDatabaseName": f"{const.SOLUTION_NAME}-{DatabaseType.S3_UNSTRUCTURED.value}-{run_database.database_name}", - "TableName": job_placeholder if is_empty(run_database.table_name) else run_database.table_name, + "TableName": run_database.table_name if run_database.table_name else job_placeholder, "TemplateId": str(run.template_id), "TemplateSnapshotNo": str(run.template_snapshot_no), "DepthStructured": "0" if run.depth_structured is None else str(run.depth_structured), "DepthUnstructured": "0" if run.depth_unstructured is None else str(run.depth_unstructured), - "ExcludeKeywords": job_placeholder if is_empty(run.exclude_keywords) else run.exclude_keywords, - "IncludeKeywords": job_placeholder if is_empty(run.include_keywords) else run.include_keywords, - "ExcludeFileExtensions": job_placeholder if is_empty(run.exclude_file_extensions) else run.exclude_file_extensions, - "IncludeFileExtensions": job_placeholder if is_empty(run.include_file_extensions) else run.include_file_extensions, + "ExcludeKeywords": run.exclude_keywords if run.exclude_keywords else job_placeholder, + "IncludeKeywords": run.include_keywords if run.include_keywords else job_placeholder, + "ExcludeFileExtensions": run.exclude_file_extensions if run.exclude_file_extensions else job_placeholder, + "IncludeFileExtensions": run.include_file_extensions if run.include_file_extensions else job_placeholder, "BaseTime": base_time, # "JobBookmarkOption": job_bookmark_option, "DetectionThreshold": str(job.detection_threshold), @@ -615,8 +618,8 @@ def list_run_databases_pagination(run_id: int, condition: QueryCondition): def get_run_status(job_id: int, run_id: int) -> schemas.DiscoveryJobRunDatabaseStatus: run_list = crud.list_run_databases(run_id) - total_count = success_count = fail_count = ready_count = running_count = stopped_count = not_existed_count = 0 - success_per = fail_per = ready_per = running_per = stopped_per = not_existed_per = 0 + success_count = fail_count = ready_count = pending_count = running_count = stopped_count = not_existed_count = 0 + success_per = fail_per = ready_per = pending_per = running_per = stopped_per = not_existed_per = 0 total_count = len(run_list) if total_count > 0: @@ -627,6 +630,8 @@ def get_run_status(job_id: int, run_id: int) -> schemas.DiscoveryJobRunDatabaseS fail_count += 1 elif run_item.state == RunDatabaseState.READY.value: ready_count += 1 + elif run_item.state == RunDatabaseState.PENDING.value: + pending_count += 1 elif run_item.state == RunDatabaseState.RUNNING.value: running_count += 1 elif run_item.state == RunDatabaseState.STOPPED.value: @@ -636,6 +641,7 @@ def get_run_status(job_id: int, run_id: int) -> schemas.DiscoveryJobRunDatabaseS fail_per = int(fail_count / total_count * 100) ready_per = int(ready_count / total_count * 100) + pending_per = int(pending_count / total_count * 100) running_per = int(running_count / total_count * 100) stopped_per = int(stopped_count / total_count * 100) not_existed_per = int(not_existed_count / total_count * 100) @@ -646,6 +652,7 @@ def get_run_status(job_id: int, run_id: int) -> schemas.DiscoveryJobRunDatabaseS success_count=success_count, fail_count=fail_count, ready_count=ready_count, + pending_count=pending_count, running_count=running_count, stopped_count=stopped_count, not_existed_count=not_existed_count, @@ -653,6 +660,7 @@ def get_run_status(job_id: int, run_id: int) -> schemas.DiscoveryJobRunDatabaseS success_per=success_per, fail_per=fail_per, ready_per=ready_per, + pending_per=pending_per, running_per=running_per, stopped_per=stopped_per, not_existed_per=not_existed_per diff --git a/source/constructs/api/logging.conf b/source/constructs/api/logging.conf index 250b8107..2f859a0d 100644 --- a/source/constructs/api/logging.conf +++ b/source/constructs/api/logging.conf @@ -12,7 +12,7 @@ level=INFO handlers= [logger_api] -level=INFO +level=DEBUG handlers=consoleHandler qualname=api diff --git a/source/constructs/api/tools/str_tool.py b/source/constructs/api/tools/str_tool.py deleted file mode 100644 index b42bcc3d..00000000 --- a/source/constructs/api/tools/str_tool.py +++ /dev/null @@ -1,7 +0,0 @@ -from common.constant import const - - -def is_empty(in_str: str) -> bool: - if in_str is None or in_str == const.EMPTY_STR: - return True - return False From d7c0cb3146080ccaa561e08275aa00f79fcbc8af Mon Sep 17 00:00:00 2001 From: cuihubin <530051970@qq.com> Date: Fri, 26 Jan 2024 11:07:28 +0800 Subject: [PATCH 017/112] update jdbc bratch create --- source/constructs/api/data_source/service.py | 21 +++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/source/constructs/api/data_source/service.py b/source/constructs/api/data_source/service.py index 77d61603..ce9a7823 100644 --- a/source/constructs/api/data_source/service.py +++ b/source/constructs/api/data_source/service.py @@ -1775,7 +1775,7 @@ def add_jdbc_conn(jdbcConn: JDBCInstanceSource): # Create Crawler jdbc_source = JdbcSource(connection_url=jdbcConn.jdbc_connection_url, username=jdbcConn.master_username, password=jdbcConn.password, secret_id=jdbcConn.secret) - db_names = get_db_names_4_jdbc(jdbcConn.jdbc_connection_url, jdbcConn.jdbc_connection_schema, jdbc_source) + db_names = get_db_names_4_jdbc(jdbc_source, jdbcConn.jdbc_connection_schema) for db_name in db_names: trimmed_db_name = db_name.strip() if trimmed_db_name: @@ -2667,8 +2667,13 @@ def batch_create(file: UploadFile = File(...)): # Read the Excel file content = file.file.read() workbook = openpyxl.load_workbook(BytesIO(content), read_only=False) + print(f"******** worksheet is: {workbook}") + print(f"******** worksheet type is: {type(workbook)}") + sheet_names = workbook.sheetnames + print(f"******** sheet_names is: {sheet_names[0]}") + print(f"******** BATCH_SHEET is: {const.BATCH_SHEET}") try: - sheet = workbook[const.BATCH_SHEET] + sheet = workbook.get_sheet_by_name(const.BATCH_SHEET) except KeyError: raise BizException(MessageEnum.SOURCE_BATCH_SHEET_NOT_FOUND.get_code(), MessageEnum.SOURCE_BATCH_SHEET_NOT_FOUND.get_msg()) @@ -2682,7 +2687,7 @@ def batch_create(file: UploadFile = File(...)): for row_index, row in enumerate(sheet.iter_rows(min_row=3), start=2): if all(cell.value is None for cell in row): continue - if any(not cell.value for cell in [row[0], row[1], row[3], row[5], row[6], row[7], row[8], row[9]]): + if any(cell.value is None or str(cell.value).strip() == const.EMPTY_STR for cell in [row[0], row[1], row[3], row[5], row[6], row[7], row[8], row[9]]): __add_error_msg(sheet, max_column, row_index, "Fields cannot be empty") elif sheet.cell(row=row_index + 1, column=2).value not in [0, 1]: __add_error_msg(sheet, max_column, row_index, f"The value of {header[1]} must be 0 or 1") @@ -2716,13 +2721,10 @@ def batch_create(file: UploadFile = File(...)): continue v = result.get(f"{row[9].value}/{row[7].value}/{row[8].value}/{row[0].value}") if v: - if v.split('/')[0]=="SUCCESSED": + if v.split('/')[0] == "SUCCESSED": __add_success_msg(sheet, max_column, row_index) else: __add_error_msg(sheet, max_column, row_index, v.split('/')[1]) - else: - raise BizException(MessageEnum.SOURCE_BATCH_SHEET_NOT_FOUND.get_code(), - MessageEnum.SOURCE_BATCH_SHEET_NOT_FOUND.get_msg()) # Write into excel excel_bytes = BytesIO() workbook.save(excel_bytes) @@ -2749,6 +2751,7 @@ def query_batch_status(filename: str): response = __s3_client.get_object(Bucket=admin_bucket_name, Key=file_key) excel_bytes = response['Body'].read() workbook = openpyxl.load_workbook(BytesIO(excel_bytes)) + try: sheet = workbook[const.BATCH_SHEET] except KeyError: @@ -2758,10 +2761,10 @@ def query_batch_status(filename: str): if row[10] == "FAILED": return 1 return 2 - return 0 + return 0 def download_batch_file(filename: str): - key = const.BATCH_CREATE_TEMPLATE_PATH if filename == "template" else f'{const.BATCH_CREATE_REPORT_PATH}/{filename}.xlsx' + key = const.BATCH_CREATE_TEMPLATE_PATH if filename.startswith("template") else f'{const.BATCH_CREATE_REPORT_PATH}/{filename}.xlsx' url = __s3_client.generate_presigned_url( ClientMethod="get_object", Params={'Bucket': admin_bucket_name, 'Key': key}, From 4dcf46d29c189dbea7f412f3d19a846a012092c5 Mon Sep 17 00:00:00 2001 From: cuihubin <530051970@qq.com> Date: Fri, 26 Jan 2024 12:56:55 +0800 Subject: [PATCH 018/112] add list resource fun --- source/constructs/api/data_source/crud.py | 14 ++++++++++++++ source/constructs/api/data_source/resource_list.py | 14 ++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 source/constructs/api/data_source/resource_list.py diff --git a/source/constructs/api/data_source/crud.py b/source/constructs/api/data_source/crud.py index ffbca27d..6bdf965a 100644 --- a/source/constructs/api/data_source/crud.py +++ b/source/constructs/api/data_source/crud.py @@ -997,3 +997,17 @@ def update_schema_by_account(provider_id, account_id, instance, region, schema): if jdbc_instance_source: jdbc_instance_source.jdbc_connection_schema = schema session.commit() + +def list_s3_resources(): + return get_session().query(S3BucketSource.account_id, S3BucketSource.region, S3BucketSource.bucket_name).all() + +def list_rds_resources(): + return get_session().query(RdsInstanceSource.account_id, RdsInstanceSource.region, RdsInstanceSource.instance_id).all() + +def list_glue_resources(): + return get_session().query(SourceGlueDatabase.account_id, SourceGlueDatabase.region, SourceGlueDatabase.glue_database_name).all() + +def list_jdbc_resources_by_provider(provider_id: int): + return get_session() \ + .query(JDBCInstanceSource.account_provider_id, JDBCInstanceSource.account_id, JDBCInstanceSource.region, JDBCInstanceSource.instance_id) \ + .filter(JDBCInstanceSource.account_provider_id == provider_id).all() diff --git a/source/constructs/api/data_source/resource_list.py b/source/constructs/api/data_source/resource_list.py new file mode 100644 index 00000000..6dea0080 --- /dev/null +++ b/source/constructs/api/data_source/resource_list.py @@ -0,0 +1,14 @@ +from common.enum import DatabaseType +from common.abilities import convert_database_type_2_provider +from . import crud + + +def list_resources_by_database_type(data_base_type: str): + if data_base_type == DatabaseType.S3.value: + return crud.list_s3_resources() + elif data_base_type == DatabaseType.RDS.value: + return crud.list_rds_resources() + elif data_base_type == DatabaseType.GLUE.value: + return crud.list_glue_resources() + else: + return crud.list_jdbc_resources_by_provider(convert_database_type_2_provider(data_base_type)) From 35ee099546bddd5c341ba1438622972aff6ec540 Mon Sep 17 00:00:00 2001 From: junzhong Date: Fri, 26 Jan 2024 15:26:08 +0800 Subject: [PATCH 019/112] fix(be): Compatibility job failed --- .../constructs/api/discovery_job/service.py | 29 +++++++++++-------- source/constructs/api/lambda/controller.py | 12 +++++--- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/source/constructs/api/discovery_job/service.py b/source/constructs/api/discovery_job/service.py index 066df698..fb86b574 100644 --- a/source/constructs/api/discovery_job/service.py +++ b/source/constructs/api/discovery_job/service.py @@ -863,10 +863,10 @@ def check_running_run_databases(): if (datetime.datetime.now(pytz.timezone('UTC')) - stop_time).seconds < const.LAMBDA_MAX_RUNTIME: logger.info(f"run id:{run_database.run_id},run database id:{run_database.id} continue") continue - state = RunDatabaseState.SUCCEEDED.value + state, message = __get_run_log(run_database, False) elif run_database_state == RunDatabaseState.FAILED.value.upper(): state = RunDatabaseState.FAILED.value - message = __get_run_error_log(run_database) + _, message = __get_run_log(run_database) elif run_database_state == RunDatabaseState.ABORTED.value.upper(): state = RunDatabaseState.STOPPED.value @@ -906,7 +906,7 @@ def __get_run_database_state_from_agent(run_database: models.DiscoveryJobRunData return RunDatabaseState.NOT_EXIST.value, None -def __get_run_error_log(run_database: models.DiscoveryJobRunDatabase) -> str: +def __get_run_log(run_database: models.DiscoveryJobRunDatabase, error_log=True) -> (str, str): account_id = run_database.account_id region = run_database.region if need_change_account_id(run_database.database_type): @@ -925,24 +925,29 @@ def __get_run_error_log(run_database: models.DiscoveryJobRunDatabase) -> str: aws_session_token=credentials['SessionToken'], region_name=region, ) + max_results = 1 if error_log else 5 try: response = client_sfn.get_execution_history( executionArn=f'arn:{partition}:states:{region}:{account_id}:execution:{const.SOLUTION_NAME}-DiscoveryJob:{const.SOLUTION_NAME}-{run_database.run_id}-{run_database.id}-{run_database.uuid}', reverseOrder=True, - maxResults=1, + maxResults=max_results, ) except client_sfn.exceptions.ExecutionDoesNotExist as e: - return RunDatabaseState.NOT_EXIST.value - if response["events"][0]["type"] == "ExecutionFailed": - return response["events"][0]["executionFailedEventDetails"]["cause"] - return "" + return RunDatabaseState.NOT_EXIST.value, "" + if error_log: + if response["events"][0]["type"] == "ExecutionFailed": + return RunDatabaseState.FAILED.value, response["events"][0]["executionFailedEventDetails"]["cause"] + return RunDatabaseState.FAILED.value, "" + else: + parameters = json.loads(response["events"][4]["taskScheduledEventDetails"]["parameters"]) + result = parameters.get("MessageBody", {}).get("Result") + if result: + return result.get("State"), result.get("Message", "") + return RunDatabaseState.SUCCEEDED.value, "" def __get_cell_value(cell: dict): - if "VarCharValue" in cell: - return cell["VarCharValue"] - else: - return "" + return cell.get("VarCharValue", "") def get_report_url(run_id: int): diff --git a/source/constructs/api/lambda/controller.py b/source/constructs/api/lambda/controller.py index 590a068c..fdde8b30 100644 --- a/source/constructs/api/lambda/controller.py +++ b/source/constructs/api/lambda/controller.py @@ -2,12 +2,13 @@ import discovery_job.service as discovery_job_service import data_source.service as data_source_service from db.database import gen_session, close_session -import logging -from common.reference_parameter import logger +import logging.config from common.constant import const from . import auto_sync_data, sync_crawler_results +import re -logger.setLevel(logging.INFO) +logging.config.fileConfig('logging.conf', disable_existing_loggers=False) +logger = logging.getLogger(const.LOGGER_API) def lambda_handler(event, context): @@ -45,10 +46,13 @@ def __schedule_job(event): def __deal_queue(event): event_source = event['Records'][0]["eventSourceARN"].split(":")[-1] + logger.info(f"event_source:{event_source}") for record in event['Records']: payload = record["body"] logger.info(payload) - payload = payload.replace("\'", "\"") + updated_string = re.sub(r'("[^"]*?)(\')(.*?)(\')([^"]*?")', r'\1 \3 \5', str(payload)) + payload = updated_string.replace("\'", "\"") + logger.debug(payload) current_event = json.loads(payload) if event_source == f"{const.SOLUTION_NAME}-DiscoveryJob": discovery_job_service.complete_run_database(current_event) From d5c29431821880db8f25a7f9c32ecd5dedf0e940 Mon Sep 17 00:00:00 2001 From: Magic Chen Date: Fri, 26 Jan 2024 15:30:28 +0800 Subject: [PATCH 020/112] chore: add system settings api --- source/portal/public/locales/en/common.json | 7 +- source/portal/public/locales/zh/common.json | 8 +- source/portal/src/apis/config/api.ts | 18 ++ .../src/pages/system-settings/index.tsx | 278 +++++++++++++----- .../pages/system-settings/typs/config-typs.ts | 10 + 5 files changed, 244 insertions(+), 77 deletions(-) create mode 100644 source/portal/src/apis/config/api.ts create mode 100644 source/portal/src/pages/system-settings/typs/config-typs.ts diff --git a/source/portal/public/locales/en/common.json b/source/portal/public/locales/en/common.json index b5d0ac16..087fe968 100644 --- a/source/portal/public/locales/en/common.json +++ b/source/portal/public/locales/en/common.json @@ -441,7 +441,10 @@ "subnet": "Subnet ", "subnetNameDesc": "Total number of left IPs in subnet", "currentIPLeft": "Current IP left", - "subnetDesc": "IP usage per subnet = (Concurrent number of RDS Instance * (3 + Concurrent number of sub-job runs)) / Count of subnets", - "estimateResult": "Based on the above settings, for each job run it will consume {{ipCount}} IPs maximum per subnet." + "subnetDesc": "IP usage per subnet = 3 + (Number of sub-job runs can be used for 1 RDS scan * 2)", + "estimateResult": "Based on the above settings, for each job run it will consume {{ipCount}} IPs maximum per subnet.", + "estimateError": "The IP in discovery job can not be more than the IP left of subnets. Please adjust the settings.", + "estimateSuccess": "Config validate successfully.", + "estimateFirst": "Please click validate button to validate the settings." } } diff --git a/source/portal/public/locales/zh/common.json b/source/portal/public/locales/zh/common.json index eca3ad2b..211a7018 100644 --- a/source/portal/public/locales/zh/common.json +++ b/source/portal/public/locales/zh/common.json @@ -440,8 +440,10 @@ "rdsSubJobRunNumberDesc": "1 次 RDS 扫描可运行多少次 Glue 作业", "subnet": "子网 ", "subnetNameDesc": "子网中剩余IP总数", - "currentIPLeft": "当前剩余IP", - "subnetDesc": "每个子网的 IP 使用量 = (RDS 实例并发数量 * (3 + 子作业并发运行数量)) / 子网数量", - "estimateResult": "根据上述设置,对于每个作业运行,每个子网最多将消耗 {{ipCount}} 个 IP。" + "subnetDesc": "每个子网的 IP 使用量 = 3 + (可用于 1 次 RDS 扫描的子作业运行数量 * 2)", + "estimateResult": "根据上述设置,对于每个作业运行,每个子网最多将消耗 {{ipCount}} 个 IP。", + "estimateError": "发现作业中的 IP 不能超过子网剩余的 IP。请调整设置。", + "estimateSuccess": "配置验证成功。", + "estimateFirst": "请单击验证按钮以验证设置。" } } diff --git a/source/portal/src/apis/config/api.ts b/source/portal/src/apis/config/api.ts new file mode 100644 index 00000000..70a74985 --- /dev/null +++ b/source/portal/src/apis/config/api.ts @@ -0,0 +1,18 @@ +import { apiRequest } from 'tools/apiRequest'; + +const getSystemConfig = async (params: any) => { + const result = await apiRequest('get', 'config', params); + return result; +}; + +const getSubnetsRunIps = async (params: any) => { + const result = await apiRequest('get', 'config/subnets', params); + return result; +}; + +const updateSystemConfig = async (params: any) => { + const result = await apiRequest('post', 'config', params); + return result; +}; + +export { getSystemConfig, getSubnetsRunIps, updateSystemConfig }; diff --git a/source/portal/src/pages/system-settings/index.tsx b/source/portal/src/pages/system-settings/index.tsx index b6a10344..6d58e418 100644 --- a/source/portal/src/pages/system-settings/index.tsx +++ b/source/portal/src/pages/system-settings/index.tsx @@ -1,4 +1,5 @@ import { + Alert, AppLayout, Button, Container, @@ -8,19 +9,27 @@ import { Header, Input, SpaceBetween, + Spinner, } from '@cloudscape-design/components'; -import React from 'react'; +import React, { useEffect, useState } from 'react'; import CustomBreadCrumb from 'pages/left-menu/CustomBreadCrumb'; import Navigation from 'pages/left-menu/Navigation'; import { RouterEnum } from 'routers/routerEnum'; import { useTranslation } from 'react-i18next'; import { useNavigate } from 'react-router-dom'; +import { + getSubnetsRunIps, + getSystemConfig, + updateSystemConfig, +} from 'apis/config/api'; +import { ConfigItem, ConfigSubnet } from './typs/config-typs'; +import { alertMsg } from 'tools/tools'; const SettingsHeader: React.FC = () => { const { t } = useTranslation(); return (
- {t('settings.title')} + {t('nav.systemSettings')}
); }; @@ -29,77 +38,202 @@ const SystemSettingContent = () => { const navigate = useNavigate(); const { t } = useTranslation(); + const [loadingConfig, setLoadingConfig] = useState(true); + const [loadingUpdate, setLoadingUpdate] = useState(false); + const [concurrentRunJobNumber, setConcurrentRunJobNumber] = useState(''); + const [subJobNumberRDS, setSubJobNumberRDS] = useState(''); + const [subnetItems, setSubnetItems] = useState([]); + + const [ipError, setIpError] = useState(false); + const [ipSuccess, setIpSuccess] = useState(false); + + const getConfigValue = (configs: ConfigItem[], key: string) => { + const config = configs.find((config) => config.config_key === key); + return config ? config.config_value : ''; + }; + + const getSystemSettingsConfig = async () => { + setLoadingConfig(true); + const configList: any = await getSystemConfig({}); + setConcurrentRunJobNumber( + getConfigValue(configList, 'ConcurrentRunJobNumber') + ); + setSubJobNumberRDS(getConfigValue(configList, 'SubJobNumberRds')); + const ipList: any = await getSubnetsRunIps({}); + setSubnetItems(ipList); + setLoadingConfig(false); + }; + + const updateSystemSettingsConfig = async () => { + if (ipError) { + return; + } + if (!ipSuccess) { + alertMsg(t('settings.estimateFirst'), 'error'); + return; + } + setLoadingUpdate(true); + await updateSystemConfig([ + { + config_key: 'ConcurrentRunJobNumber', + config_value: concurrentRunJobNumber, + }, + { + config_key: 'SubJobNumberRds', + config_value: subJobNumberRDS, + }, + ]); + alertMsg(t('updateSuccess'), 'success'); + setIpError(false); + setIpSuccess(false); + setLoadingUpdate(false); + }; + + const findMinimumAvailableIP = (configs: ConfigSubnet[]) => { + const minAvailableIP = configs.reduce((min, current) => { + return min.available_ip_address_count < current.available_ip_address_count + ? min + : current; + }, configs[0]); + return minAvailableIP.available_ip_address_count; + }; + + const calculateEstimateRestIPs = () => { + return 3 + parseInt(subJobNumberRDS) * 2; + }; + + const estimateIPs = () => { + if (calculateEstimateRestIPs() > findMinimumAvailableIP(subnetItems) - 50) { + setIpSuccess(false); + setIpError(true); + } else { + setIpSuccess(true); + setIpError(false); + } + }; + + useEffect(() => { + getSystemSettingsConfig(); + }, []); + return ( -
- - - - } - > - {t('settings.rdsDataSourceDiscovery')} - } - > - - + {loadingConfig ? ( + + + + + + ) : ( +
+ + + + } + > + + {t('settings.rdsDataSourceDiscovery')} + + } > - - + + + { + setConcurrentRunJobNumber(e.detail.value); + }} + /> + - - - - - -
-
- - - -
-
- - - -
-
-
- - -
{t('settings.subnetDesc')}
-
- -
- {t('settings.estimateResult', { - ipCount: 100, - })} -
-
-
-
-
-
+ + { + setIpError(false); + setIpSuccess(false); + setSubJobNumberRDS(e.detail.value); + }} + /> + + + +
+ {subnetItems.map((item) => { + return ( +
+ + + +
+ ); + })} +
+
+ + +
{t('settings.subnetDesc')}
+
+ +
+ {t('settings.estimateResult', { + ipCount: calculateEstimateRestIPs(), + })} +
+
+
+ {ipError && ( + {t('settings.estimateError')} + )} + {ipSuccess && ( + {t('settings.estimateSuccess')} + )} +
+
+ + )} + ); }; @@ -108,8 +242,8 @@ const SystemSetting: React.FC = () => { const breadcrumbItems = [ { text: t('breadcrumb.home'), href: RouterEnum.Home.path }, { - text: t('breadcrumb.dataSourceConnection'), - href: RouterEnum.DataSourceConnection.path, + text: t('nav.systemSettings'), + href: '', }, ]; return ( diff --git a/source/portal/src/pages/system-settings/typs/config-typs.ts b/source/portal/src/pages/system-settings/typs/config-typs.ts new file mode 100644 index 00000000..10abca8a --- /dev/null +++ b/source/portal/src/pages/system-settings/typs/config-typs.ts @@ -0,0 +1,10 @@ +export interface ConfigItem { + config_key: string; + config_value: string; +} + +export interface ConfigSubnet { + subnet_id: string; + name: string; + available_ip_address_count: number; +} From 8fc7a2a04ed619d0d7fc8726e56c6c885eea13eb Mon Sep 17 00:00:00 2001 From: Magic Chen Date: Fri, 26 Jan 2024 15:35:04 +0800 Subject: [PATCH 021/112] fix: fix pending status color --- source/portal/src/pages/glue-job/index.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/portal/src/pages/glue-job/index.tsx b/source/portal/src/pages/glue-job/index.tsx index 9c899539..f37017f4 100644 --- a/source/portal/src/pages/glue-job/index.tsx +++ b/source/portal/src/pages/glue-job/index.tsx @@ -153,7 +153,7 @@ const GlueJobContent = () => { if (jobRowData.state === 'Active (idle)') { tempType = CLSAAIFIED_TYPE.SystemMark; } - if (jobRowData.state === 'Running') { + if (jobRowData.state === 'Running' || jobRowData.state === 'Pending') { tempType = CLSAAIFIED_TYPE.System; } if (jobRowData.state === 'Stopped') { From c81f85774b0f03deceb35cbba2ced32fef6eece3 Mon Sep 17 00:00:00 2001 From: Magic Chen Date: Sat, 27 Jan 2024 21:45:10 +0800 Subject: [PATCH 022/112] chore: add mysql and other in add jdbc --- .../portal/public/locales/en/datasource.json | 6 +- .../portal/public/locales/zh/datasource.json | 6 +- source/portal/src/index.scss | 43 +++-- .../componments/JDBCConnection.tsx | 154 +++++++++++++----- .../componments/JDBCConnectionEdit.tsx | 30 ++-- source/portal/src/ts/common.ts | 7 + 6 files changed, 180 insertions(+), 66 deletions(-) diff --git a/source/portal/public/locales/en/datasource.json b/source/portal/public/locales/en/datasource.json index cb0fc6e6..c31d21ff 100644 --- a/source/portal/public/locales/en/datasource.json +++ b/source/portal/public/locales/en/datasource.json @@ -90,7 +90,11 @@ "chooseSubnet": "Choose one subnet", "sg": "Security groups", "sgDesc": "Choose one or more security groups to allow access to the data store in your VPC subnet. Security groups are associated to the ENI attached to your subnet. You must choose at least one security group with a self-referencing inbound rule for all TCP ports.", - "chooseSG": "Choose one or more security groups" + "chooseSG": "Choose one or more security groups", + "mysql": "MySQL (Auto discovery)", + "other": "Others", + "otherError": "Other JDBC URL can not start with 'jdbc:mysql://'", + "databaseError": "JDBC Database can not be empty." }, "batch": { "name": "Batch Operation", diff --git a/source/portal/public/locales/zh/datasource.json b/source/portal/public/locales/zh/datasource.json index fe78a820..7ae6a3c9 100644 --- a/source/portal/public/locales/zh/datasource.json +++ b/source/portal/public/locales/zh/datasource.json @@ -90,7 +90,11 @@ "chooseSubnet": "选择一个子网", "sg": "安全组", "sgDesc": "选择一个或多个安全组以允许访问在你的 VPC 子网中的数据存储。安全组与你的子网关联的ENI相关联。你必须选择至少一个对所有 TCP 端口有自我引用入站规则的安全组。", - "chooseSG": "选择一个或多个安全组" + "chooseSG": "选择一个或多个安全组", + "mysql": "MySQL(自动发现)", + "other": "其他", + "otherError": "其他 JDBC URL 不能以 'jdbc:mysql://' 开头", + "databaseError": "JDBC 数据库不能为空。" }, "batch": { "name": "批量操作", diff --git a/source/portal/src/index.scss b/source/portal/src/index.scss index 5f7ef13e..f25e65da 100644 --- a/source/portal/src/index.scss +++ b/source/portal/src/index.scss @@ -1,13 +1,15 @@ body { margin: 0; - font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", "Oxygen", "Ubuntu", "Cantarell", "Fira Sans", - "Droid Sans", "Helvetica Neue", sans-serif; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', + 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', + sans-serif; -webkit-font-smoothing: antialiased; -moz-osx-font-smoothing: grayscale; } code { - font-family: source-code-pro, Menlo, Monaco, Consolas, "Courier New", monospace; + font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New', + monospace; } .hand-pointer { @@ -280,7 +282,13 @@ code { color: #3a3a3a; text-shadow: 0 1px 0 rgba(255, 255, 255, 0.75); background-color: #f7f7f7; - background-image: -webkit-gradient(linear, 0 0, 0 100%, from(#ffffff), to(#d2d2d2)); + background-image: -webkit-gradient( + linear, + 0 0, + 0 100%, + from(#ffffff), + to(#d2d2d2) + ); background-image: -webkit-linear-gradient(top, #ffffff, #d2d2d2); background-image: -moz-linear-gradient(top, #ffffff, #d2d2d2); background-image: -ms-linear-gradient(top, #ffffff, #d2d2d2); @@ -313,7 +321,7 @@ code { .popover .arrow:after { z-index: -1; - content: ""; + content: ''; } .popover.top .arrow { @@ -379,20 +387,20 @@ code { } .horizon-bar-chart { - [class^="awsui_grid_"] { + [class^='awsui_grid_'] { display: none !important; } - [class^="awsui_labels-left_"] { + [class^='awsui_labels-left_'] { display: none !important; } - [class*="awsui_axis--emphasized_"] { + [class*='awsui_axis--emphasized_'] { display: none !important; } - [class^="awsui_chart-container__vertical_"] { - [class^="awsui_labels-bottom_"] { + [class^='awsui_chart-container__vertical_'] { + [class^='awsui_labels-bottom_'] { display: none !important; } @@ -471,7 +479,7 @@ code { .custom-badge { background-color: #d1d5db; color: #fff; - font-family: "Open Sans", "Helvetica Neue", Roboto, Arial, sans-serif; + font-family: 'Open Sans', 'Helvetica Neue', Roboto, Arial, sans-serif; font-size: 12px; letter-spacing: 0.005em; line-height: 22px; @@ -506,3 +514,16 @@ code { .add-jdbc-container { padding: 20px; } + +.jdbc-prefix { + // padding: 10px; + border: 2px solid #7d8998; + border-radius: 8px 0 0 8px; + position: relative; + z-index: 10; + font-size: 14px; + line-height: 22px; + margin-right: -8px; + background-color: #eee; + padding: 4px 8px 4px 12px; +} diff --git a/source/portal/src/pages/data-source-connection/componments/JDBCConnection.tsx b/source/portal/src/pages/data-source-connection/componments/JDBCConnection.tsx index fcbea762..98f58d71 100644 --- a/source/portal/src/pages/data-source-connection/componments/JDBCConnection.tsx +++ b/source/portal/src/pages/data-source-connection/componments/JDBCConnection.tsx @@ -25,8 +25,8 @@ import { import { useEffect, useState } from 'react'; import { useTranslation } from 'react-i18next'; import { alertMsg } from 'tools/tools'; -import { i18ns } from '../types/s3_selector_config'; import { DropdownStatusProps } from '@cloudscape-design/components/internal/components/dropdown-status'; +import { checkJDBCIsMySQL } from 'ts/common'; interface JDBCConnectionProps { providerId: number; @@ -102,6 +102,11 @@ const JDBCConnection: React.FC = ( const [secretItem, setSecretItem] = useState(null); const [loadingJdbcDatabase, setLoadingJdbcDatabase] = useState(false); + const [jdbcConnType, setJdbcConnType] = useState('mysql'); + const [tmpJDBCUrl, setTmpJDBCUrl] = useState(''); + const [otherJDBCUrlError, setOtherJDBCUrlError] = useState(false); + const [jdbcDatabaseEmptyError, setJdbcDatabaseEmptyError] = useState(false); + useEffect(() => { if (credentialType === 'secret_manager') { loadAccountSecrets(); @@ -341,6 +346,14 @@ const JDBCConnection: React.FC = ( }; const addJdbcConnection = async () => { + if (jdbcConnType === 'other' && checkJDBCIsMySQL(tmpJDBCUrl)) { + setOtherJDBCUrlError(true); + return; + } + if (!jdbcConnectionData?.new?.jdbc_connection_schema?.trim()) { + setJdbcDatabaseEmptyError(true); + return; + } setLoadingImport(true); if (jdbcConnectionData.createType === 'import') { try { @@ -390,6 +403,14 @@ const JDBCConnection: React.FC = ( setJdbcConnectionData({ ...jdbcConnectionData, new: temp }); }; + useEffect(() => { + let jdbcURLStr = tmpJDBCUrl; + if (jdbcConnType === 'mysql') { + jdbcURLStr = 'jdbc:mysql://' + tmpJDBCUrl; + } + changeJDBCUrl(jdbcURLStr); + }, [tmpJDBCUrl]); + const changeDatabase = (detail: any) => { // console.log(detail) let temp = jdbcConnectionData.new; @@ -461,39 +482,39 @@ const JDBCConnection: React.FC = ( setBuckets(res); }; - const changeJDBCcertificate = (detail: any) => { - let temp = jdbcConnectionData.new; - temp = { ...temp, custom_jdbc_cert: detail.resource.uri }; - setJdbcConnectionData({ ...jdbcConnectionData, new: temp }); - }; - - const changeSkipCerValid = (detail: any) => { - // console.log("skip!!!",detail) - let temp = jdbcConnectionData.new; - temp = { - ...temp, - skip_custom_jdbc_cert_validation: detail ? 'true' : 'false', - }; - setJdbcConnectionData({ ...jdbcConnectionData, new: temp }); - }; - - const changeJDBCCertString = (detail: any) => { - let temp = jdbcConnectionData.new; - temp = { ...temp, custom_jdbc_cert_string: detail }; - setJdbcConnectionData({ ...jdbcConnectionData, new: temp }); - }; - - const changeDriverClassName = (detail: any) => { - let temp = jdbcConnectionData.new; - temp = { ...temp, jdbc_driver_class_name: detail }; - setJdbcConnectionData({ ...jdbcConnectionData, new: temp }); - }; - - const changeDriverPath = (detail: any) => { - let temp = jdbcConnectionData.new; - temp = { ...temp, jdbc_driver_jar_uri: detail.resource.uri }; - setJdbcConnectionData({ ...jdbcConnectionData, new: temp }); - }; + // const changeJDBCcertificate = (detail: any) => { + // let temp = jdbcConnectionData.new; + // temp = { ...temp, custom_jdbc_cert: detail.resource.uri }; + // setJdbcConnectionData({ ...jdbcConnectionData, new: temp }); + // }; + + // const changeSkipCerValid = (detail: any) => { + // // console.log("skip!!!",detail) + // let temp = jdbcConnectionData.new; + // temp = { + // ...temp, + // skip_custom_jdbc_cert_validation: detail ? 'true' : 'false', + // }; + // setJdbcConnectionData({ ...jdbcConnectionData, new: temp }); + // }; + + // const changeJDBCCertString = (detail: any) => { + // let temp = jdbcConnectionData.new; + // temp = { ...temp, custom_jdbc_cert_string: detail }; + // setJdbcConnectionData({ ...jdbcConnectionData, new: temp }); + // }; + + // const changeDriverClassName = (detail: any) => { + // let temp = jdbcConnectionData.new; + // temp = { ...temp, jdbc_driver_class_name: detail }; + // setJdbcConnectionData({ ...jdbcConnectionData, new: temp }); + // }; + + // const changeDriverPath = (detail: any) => { + // let temp = jdbcConnectionData.new; + // temp = { ...temp, jdbc_driver_jar_uri: detail.resource.uri }; + // setJdbcConnectionData({ ...jdbcConnectionData, new: temp }); + // }; const changeUserName = (detail: any) => { let temp = jdbcConnectionData.new; @@ -515,6 +536,11 @@ const JDBCConnection: React.FC = ( }; const findDatabase = async () => { + if (jdbcConnType === 'other' && checkJDBCIsMySQL(tmpJDBCUrl)) { + setOtherJDBCUrlError(true); + return; + } + setLoadingImport(true); setLoadingJdbcDatabase(true); const requestParam = { @@ -734,18 +760,55 @@ const JDBCConnection: React.FC = ( value={jdbcConnectionData.new.description} /> + + + { + setOtherJDBCUrlError(false); + setTmpJDBCUrl(''); + changeDatabase(''); + setJdbcConnType(detail.value); + }} + value={jdbcConnType} + items={[ + { + label: t('datasource:jdbc.mysql'), + value: 'mysql', + }, + { label: t('datasource:jdbc.other'), value: 'other' }, + ]} + /> + + <> - changeJDBCUrl(e.detail.value)} - placeholder="jdbc:protocol://host:port" - value={jdbcConnectionData.new.jdbc_connection_url} - /> +
+ {jdbcConnType === 'mysql' && ( +
jdbc:mysql://
+ )} +
+ { + setOtherJDBCUrlError(false); + setTmpJDBCUrl(e.detail.value); + }} + placeholder={ + jdbcConnType === 'mysql' + ? 'host:port' + : 'jdbc:protocol://host:port' + } + value={tmpJDBCUrl} + /> +
+
{/* = ( { + changeDatabase(''); changeUserName(detail.value); }} /> @@ -829,6 +893,7 @@ const JDBCConnection: React.FC = ( type="password" value={jdbcConnectionData.new.password} onChange={({ detail }) => { + changeDatabase(''); changePassword(detail.value); }} /> @@ -844,6 +909,7 @@ const JDBCConnection: React.FC = ( props.providerId !== 1 && ( ) } + errorText={ + jdbcDatabaseEmptyError + ? t('datasource:jdbc.databaseError') + : '' + } >