Skip to content

Commit

Permalink
Upload to pip fix command line interface
Browse files Browse the repository at this point in the history
  • Loading branch information
AbdelrhmanBassiouny committed Jul 1, 2024
1 parent 4503e5f commit cfc580b
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 92 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ dependencies = ["ipython==8.12.0",
requires-python = ">=3.8"

[project.scripts]
my_command = "neem_to_sql.migrate_neems_to_sql:main"
my_command = "neems_to_sql.neems_to_sql:main"

[project.urls]
Homepage = "https://github.com/AbdelrhmanBassiouny/neem_to_sql"
92 changes: 2 additions & 90 deletions src/mongo_vs_sql_query.py
Original file line number Diff line number Diff line change
@@ -1,93 +1,5 @@
from time import time
from neems_to_sql import main

from neems_to_sql import mongo_collection_to_list_of_dicts, parse_arguments, \
get_mongo_uri, connect_to_mongo_and_get_client, filter_and_select_neems_in_batches, filter_neems
from neems_to_sql.logger import CustomLogger, logging

if __name__ == "__main__":

LOGGER = CustomLogger("MONGO_VS_SQL_QUERY",
"mongo_vs_sql_query.txt",
logging.DEBUG, reset_handlers=True).get_logger()

# Replace the uri string with your MongoDB deployment's connection string.
args = parse_arguments()
if args.mongo_uri is not None:
MONGODB_URI = args.mongo_uri
else:
MONGODB_URI = get_mongo_uri(args.mongo_username, args.mongo_password, args.mongo_host,
args.mongo_port, args.mongo_database)
# set a 5-second connection timeout
mongo_client = connect_to_mongo_and_get_client(MONGODB_URI)
db = mongo_client.neems

# Get neem ids
meta = db.meta
meta_lod = mongo_collection_to_list_of_dicts(meta)
meta_lod = filter_neems(meta_lod, {'visibility': True})
if len(meta_lod) == 0:
LOGGER.error("NO NEEMS FOUND (Probably no meta data collection OR no neems with the given filters)")
raise ValueError("NO NEEMS FOUND (Probably no meta data collection OR no neems with the given filters)")
neem_ids = [doc['_id'] for doc in meta_lod]
LOGGER.debug(f"NEEM IDS: {neem_ids}")

total_time = 0
all_docs = []
get_collection_time = []
single_query_time = []
append_time = []
total_per_neem_time = []
for neem_id in neem_ids:
start = time()
triples = db.get_collection(f"{neem_id}_triples")
get_collection_time.append(time() - start)
start = time()
cursor = triples.aggregate([
{"$match": {"$or": [{"p": "http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#executesTask"},
{'p': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type',
'o': 'http://www.ease-crc.org/ont/SOMA.owl#Gripping'}]}},
{
"$lookup":
{
"from": f"{neem_id}_triples",
"localField": "o",
"foreignField": "s",
"as": f"{neem_id}"
}
},
{
"$unwind": f"${neem_id}"
},
{
"$project": {
f"{neem_id}.p*": 0,
f"{neem_id}._id": 0,
f"{neem_id}.graph": 0,
f"{neem_id}.scope": 0,
f"{neem_id}.o*": 0,
"p*": 0,
"o*": 0,
"_id": 0,
"graph": 0,
"scope": 0
}
},
{"$match": {f'{neem_id}.p': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type',
f'{neem_id}.o': 'http://www.ease-crc.org/ont/SOMA.owl#Gripping'}}
])
single_query_time.append(time() - start)
start = time()
all_docs.extend([doc for doc in cursor])
append_time.append(time() - start)
total_per_neem_time.append(get_collection_time[-1] + single_query_time[-1] + append_time[-1])

LOGGER.info(f"ALL DOCS: {all_docs}")
LOGGER.info(f"Total time: {sum(total_per_neem_time)}")
LOGGER.info(f"Total get collection time: {sum(get_collection_time)}")
LOGGER.info(f"Total single query time: {sum(single_query_time)}")
LOGGER.info(f"Total append time: {sum(append_time)}")
LOGGER.info(f"Avg per neem time: {sum(total_per_neem_time) / len(neem_ids)}")
LOGGER.info(f"Avg get collection time: {sum(get_collection_time) / len(neem_ids)}")
LOGGER.info(f"Avg single query time: {sum(single_query_time) / len(neem_ids)}")
LOGGER.info(f"Avg append time: {sum(append_time) / len(neem_ids)}")
LOGGER.info(f"Total number of documents: {len(all_docs)}")
main()
44 changes: 43 additions & 1 deletion src/neems_to_sql/neems_to_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from orderedset import OrderedSet
from pymongo import MongoClient
from pymongo.collection import Collection
from sqlalchemy import text
from sqlalchemy import text, create_engine
from sqlalchemy.engine import Engine, Connection
from sqlalchemy.exc import SQLAlchemyError
from tqdm import tqdm
Expand Down Expand Up @@ -2133,3 +2133,45 @@ def get_neem_filters_from_yaml(neem_filters_yaml: Optional[str] = None) -> dict:
else:
filters = None
return filters


def main():
# Parse command line arguments
args = parse_arguments()

set_logging_level(args.log_level)

neem_filters_from_yaml = get_neem_filters_from_yaml(args.neem_filters_yaml)

# Replace the uri string with your MongoDB deployment's connection string.
if args.mongo_uri is not None:
MONGODB_URI = args.mongo_uri
else:
MONGODB_URI = get_mongo_uri(args.mongo_username, args.mongo_password, args.mongo_host,
args.mongo_port, args.mongo_database)
# set a 5-second connection timeout
mongo_client = connect_to_mongo_and_get_client(MONGODB_URI)

# Create SQL engine
if args.sql_uri is not None:
SQL_URI = args.sql_uri
else:
SQL_URI = get_sql_uri(args.sql_username, args.sql_password, args.sql_host, args.sql_database)
sql_engine = create_engine(SQL_URI, future=True)

get_mongo_neems_and_put_into_sql_database(sql_engine, mongo_client,
drop_neems=args.drop_neems,
drop_tables=args.drop_tables,
allow_increasing_sz=args.allow_increasing_sz,
allow_text_indexing=args.allow_text_indexing,
max_null_percentage=args.max_null_percentage,
skip_bad_triples=args.skip_bad_triples,
neem_filters=neem_filters_from_yaml,
batch_size=args.batch_size,
number_of_batches=args.number_of_batches,
start_batch=args.start_batch,
dump_data_stats=args.dump_data_stats)


if __name__ == "__main__":
main()

0 comments on commit cfc580b

Please sign in to comment.