Skip to content

Commit

Permalink
Added project files
Browse files Browse the repository at this point in the history
  • Loading branch information
Kaszanas committed Aug 27, 2021
1 parent d4d3138 commit b37590c
Show file tree
Hide file tree
Showing 5 changed files with 160 additions and 1 deletion.
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2021 Kaszanas
Copyright (c) 2021 Andrzej Białecki

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
55 changes: 55 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# SC2DatasetPreparator

This repository contains tools which can be used in order to perform the following steps:

1. Using ```src/directory_flattener.py``` Flatten the directory structure and save the old directory tree to a mapping of ```{"replayUniqueHash": "whereItWasInOldStructure"}```
2. Using ```src/sc2_replaypack_processor``` Perform replaypack processing with https://github.com/Kaszanas/SC2InfoExtractorGo

## Customization

In order to specify different processing flags for https://github.com/Kaszanas/SC2InfoExtractorGo please modify the ```src/sc2_replaypack_processor``` file directly

## Usage

Before using this software please Python >= 3.7 and ```requirements.txt```.

Please keep in mind that ```src/directory_flattener.py``` does not contain default flag values and can be customized with the following command line flags:

```
usage: directory_flattener.py [-h] [--input_path INPUT_PATH]
[--file_extension FILE_EXTENSION]
Directory restructuring tool used in order to flatten the structure, map the
old structure to a separate file, and for later processing with other tools.
optional arguments:
-h, --help show this help message and exit
--input_path INPUT_PATH
Please provide input path to the dataset that is going
to be processed.
--file_extension FILE_EXTENSION
Please provide a file extension for files that will be
moved and renamed.
```


Please keep in mind that the ```src/sc2_replaypack_processor.py``` does not contain default flag values and can be customized with the following command line flags:

```
Tool used for processing SC2 datasets. with
https://github.com/Kaszanas/SC2InfoExtractorGo
optional arguments:
-h, --help show this help message and exit
--input_dir INPUT_DIR
Please provide input path to the directory containing
the dataset that is going to be processed.
--output_dir OUTPUT_DIR
Please provide an output directory for the resulting
files.
--number_of_processes NUMBER_OF_PROCESSES
Please provide the number of processes to be spawn for
the dataset processing.
```

# Citation
Binary file added requirements.txt
Binary file not shown.
45 changes: 45 additions & 0 deletions src/directory_flattener.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import os
import argparse
import uuid
import json
import glob
import shutil

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Directory restructuring tool used in order to flatten the structure, map the old structure to a separate file, and for later processing with other tools.")
parser.add_argument("--input_path", help="Please provide input path to the dataset that is going to be processed.")
parser.add_argument("--file_extension", help="Please provide a file extension for files that will be moved and renamed.")
args = parser.parse_args()


dir_structure_mapping = {}

# Iterate over the supplied directory:
for root, _, filename in os.walk(args.input_path):
# Performing action for every file that was detected
for file in filename:
if file.endswith(args.file_extension):

# Prepare relative paths:
relative_dir = os.path.relpath(root, args.input_path)
relative_file = os.path.join(relative_dir, file)
# Get unique filename:
unique_filename = uuid.uuid4().hex

# Create directory if it doesn't exist:
new_root_directory = args.input_path + "_processed"
if not os.path.exists(new_root_directory):
os.makedirs(new_root_directory)

# Moving and renaming files:
current_file = os.path.join(root, file)
new_path_and_filename = os.path.join(new_root_directory, unique_filename+args.file_extension)

# Copying files:
shutil.copy(current_file, new_path_and_filename)

# Add to a mapping
dir_structure_mapping[unique_filename] = relative_file

with open(os.path.join(args.input_path+"_processed", "processed_mapping.json"), "w") as json_file:
json.dump(dir_structure_mapping, json_file)
59 changes: 59 additions & 0 deletions src/sc2_replaypack_processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import os
import argparse
import subprocess
from tqdm import tqdm
from multiprocessing import Pool


def multiprocessing_scheduler(processing_arguments):
with Pool(processes=12) as pool:
pool.imap_unordered(multiprocessing_client, processing_arguments)
pool.close()
pool.join()


def multiprocessing_client(arguments:tuple):

directory, output_directory_filepath = arguments

subprocess.run(["GoSC2Science.exe",
f"-input={directory}/",
f"-output={output_directory_filepath}/",
"-integrity_check=false",
"-validity_check=false",
"-number_of_packages=1",
"-game_mode=0b1111111111",
"-localized_maps_file=F:\\Projects\\EsportDataset\\processed\\program\\new_maps_processed.json",
"-perform_anonymization=false",
"-localize_maps=true",
"-with_multiprocessing=false",
"-log_level=3",
f"-log_dir={output_directory_filepath}/"])

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Tool used for processing SC2 datasets. with https://github.com/Kaszanas/SC2InfoExtractorGo")
parser.add_argument("--input_dir", help="Please provide input path to the directory containing the dataset that is going to be processed.")
parser.add_argument("--output_dir", help="Please provide an output directory for the resulting files.")
parser.add_argument("--number_of_processes", help="Please provide the number of processes to be spawned for the dataset processing.")
args = parser.parse_args()

multiprocessing_list = []
for directory, _, file in tqdm(os.walk(args.input_dir)):

# Create the main output directory:
if not os.path.exists(args.output_dir):
os.mkdir(args.output_dir)

output_directory_name = directory.split("\\")[-1]
if output_directory_name == "input":
continue

output_directory_filepath = os.path.join(args.output_dir, output_directory_name)

# Create the output subdirectories:
if not os.path.exists(output_directory_filepath):
os.mkdir(output_directory_filepath)

multiprocessing_list.append((directory, output_directory_filepath))

multiprocessing_scheduler(multiprocessing_list)

0 comments on commit b37590c

Please sign in to comment.