This repository has been archived by the owner on May 17, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 274
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
935 additions
and
919 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# EditorConfig spec for a consistent cross-editor style. | ||
# Read more: https://EditorConfig.org | ||
|
||
root = true | ||
|
||
[*] | ||
end_of_line = lf # Unix-style newlines with a newline ending every file | ||
insert_final_newline = true | ||
trim_trailing_whitespace = true | ||
# 4 space indentation | ||
indent_style = space | ||
indent_size = 4 | ||
|
||
[*.{md,py}] | ||
charset = utf-8 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,9 @@ | ||
from typing import Tuple | ||
|
||
from .database import connect_to_uri | ||
from .diff_tables import TableSegment, TableDiffer | ||
|
||
|
||
def create_source(db_uri: str, table_name: str, key_column: str, extra_columns: Tuple[str, ...] = ()): | ||
db = connect_to_uri(db_uri) | ||
return TableSegment(db, (table_name,), key_column, tuple(extra_columns)) | ||
from typing import Tuple | ||
|
||
from .database import connect_to_uri | ||
from .diff_tables import TableSegment, TableDiffer | ||
|
||
|
||
def create_source(db_uri: str, table_name: str, key_column: str, extra_columns: Tuple[str, ...] = ()): | ||
db = connect_to_uri(db_uri) | ||
return TableSegment(db, (table_name,), key_column, tuple(extra_columns)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,115 +1,115 @@ | ||
from multiprocessing.sharedctypes import Value | ||
import sys | ||
import time | ||
import logging | ||
from itertools import islice | ||
|
||
from .diff_tables import TableSegment, TableDiffer | ||
from .database import connect_to_uri | ||
from .parse_time import parse_time_before_now, UNITS_STR, ParseError | ||
|
||
import click | ||
|
||
LOG_FORMAT = "[%(asctime)s] %(levelname)s - %(message)s" | ||
DATE_FORMAT = "%H:%M:%S" | ||
|
||
|
||
@click.command() | ||
@click.argument("db1_uri") | ||
@click.argument("table1_name") | ||
@click.argument("db2_uri") | ||
@click.argument("table2_name") | ||
@click.option("-k", "--key-column", default="id", help="Name of primary key column") | ||
@click.option("-t", "--update-column", default=None, help="Name of updated_at/last_updated column") | ||
@click.option("-c", "--columns", default=[], multiple=True, help="Names of extra columns to compare") | ||
@click.option("-l", "--limit", default=None, help="Maximum number of differences to find") | ||
@click.option("--bisection-factor", default=32, help="Segments per iteration") | ||
@click.option("--bisection-threshold", default=1024**2, help="Minimal bisection threshold") | ||
@click.option( | ||
"--min-age", | ||
default=None, | ||
help="Considers only rows older than specified. " | ||
"Example: --min-age=5min ignores rows from the last 5 minutes. " | ||
f"\nValid units: {UNITS_STR}", | ||
) | ||
@click.option("--max-age", default=None, help="Considers only rows younger than specified. See --min-age.") | ||
@click.option("-s", "--stats", is_flag=True, help="Print stats instead of a detailed diff") | ||
@click.option("-d", "--debug", is_flag=True, help="Print debug info") | ||
@click.option("-v", "--verbose", is_flag=True, help="Print extra info") | ||
@click.option("-i", "--interactive", is_flag=True, help="Confirm queries, implies --debug") | ||
def main( | ||
db1_uri, | ||
table1_name, | ||
db2_uri, | ||
table2_name, | ||
key_column, | ||
update_column, | ||
columns, | ||
limit, | ||
bisection_factor, | ||
bisection_threshold, | ||
min_age, | ||
max_age, | ||
stats, | ||
debug, | ||
verbose, | ||
interactive, | ||
): | ||
if limit and stats: | ||
print("Error: cannot specify a limit when using the -s/--stats switch") | ||
return | ||
if interactive: | ||
debug = True | ||
|
||
if debug: | ||
logging.basicConfig(level=logging.DEBUG, format=LOG_FORMAT, datefmt=DATE_FORMAT) | ||
elif verbose: | ||
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT, datefmt=DATE_FORMAT) | ||
|
||
db1 = connect_to_uri(db1_uri) | ||
db2 = connect_to_uri(db2_uri) | ||
|
||
if interactive: | ||
db1.enable_interactive() | ||
db2.enable_interactive() | ||
|
||
start = time.time() | ||
|
||
try: | ||
options = dict( | ||
min_time=min_age and parse_time_before_now(min_age), max_time=max_age and parse_time_before_now(max_age) | ||
) | ||
except ParseError as e: | ||
logging.error("Error while parsing age expression: %s" % e) | ||
return | ||
|
||
table1 = TableSegment(db1, (table1_name,), key_column, update_column, columns, **options) | ||
table2 = TableSegment(db2, (table2_name,), key_column, update_column, columns, **options) | ||
|
||
differ = TableDiffer(bisection_factor=bisection_factor, bisection_threshold=bisection_threshold, debug=debug) | ||
diff_iter = differ.diff_tables(table1, table2) | ||
|
||
if limit: | ||
diff_iter = islice(diff_iter, int(limit)) | ||
|
||
if stats: | ||
diff = list(diff_iter) | ||
unique_diff_count = len({i[0] for _, i in diff}) | ||
percent = 100 * unique_diff_count / table1.count | ||
print(f"Diff-Total: {len(diff)} changed rows out of {table1.count}") | ||
print(f"Diff-Percent: {percent:.4f}%") | ||
plus = len([1 for op, _ in diff if op == "+"]) | ||
minus = len([1 for op, _ in diff if op == "-"]) | ||
print(f"Diff-Split: +{plus} -{minus}") | ||
else: | ||
for op, key in diff_iter: | ||
print(op, key) | ||
sys.stdout.flush() | ||
|
||
end = time.time() | ||
|
||
logging.info(f"Duration: {end-start:.2f} seconds.") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() | ||
from multiprocessing.sharedctypes import Value | ||
import sys | ||
import time | ||
import logging | ||
from itertools import islice | ||
|
||
from .diff_tables import TableSegment, TableDiffer | ||
from .database import connect_to_uri | ||
from .parse_time import parse_time_before_now, UNITS_STR, ParseError | ||
|
||
import click | ||
|
||
LOG_FORMAT = "[%(asctime)s] %(levelname)s - %(message)s" | ||
DATE_FORMAT = "%H:%M:%S" | ||
|
||
|
||
@click.command() | ||
@click.argument("db1_uri") | ||
@click.argument("table1_name") | ||
@click.argument("db2_uri") | ||
@click.argument("table2_name") | ||
@click.option("-k", "--key-column", default="id", help="Name of primary key column") | ||
@click.option("-t", "--update-column", default=None, help="Name of updated_at/last_updated column") | ||
@click.option("-c", "--columns", default=[], multiple=True, help="Names of extra columns to compare") | ||
@click.option("-l", "--limit", default=None, help="Maximum number of differences to find") | ||
@click.option("--bisection-factor", default=32, help="Segments per iteration") | ||
@click.option("--bisection-threshold", default=1024**2, help="Minimal bisection threshold") | ||
@click.option( | ||
"--min-age", | ||
default=None, | ||
help="Considers only rows older than specified. " | ||
"Example: --min-age=5min ignores rows from the last 5 minutes. " | ||
f"\nValid units: {UNITS_STR}", | ||
) | ||
@click.option("--max-age", default=None, help="Considers only rows younger than specified. See --min-age.") | ||
@click.option("-s", "--stats", is_flag=True, help="Print stats instead of a detailed diff") | ||
@click.option("-d", "--debug", is_flag=True, help="Print debug info") | ||
@click.option("-v", "--verbose", is_flag=True, help="Print extra info") | ||
@click.option("-i", "--interactive", is_flag=True, help="Confirm queries, implies --debug") | ||
def main( | ||
db1_uri, | ||
table1_name, | ||
db2_uri, | ||
table2_name, | ||
key_column, | ||
update_column, | ||
columns, | ||
limit, | ||
bisection_factor, | ||
bisection_threshold, | ||
min_age, | ||
max_age, | ||
stats, | ||
debug, | ||
verbose, | ||
interactive, | ||
): | ||
if limit and stats: | ||
print("Error: cannot specify a limit when using the -s/--stats switch") | ||
return | ||
if interactive: | ||
debug = True | ||
|
||
if debug: | ||
logging.basicConfig(level=logging.DEBUG, format=LOG_FORMAT, datefmt=DATE_FORMAT) | ||
elif verbose: | ||
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT, datefmt=DATE_FORMAT) | ||
|
||
db1 = connect_to_uri(db1_uri) | ||
db2 = connect_to_uri(db2_uri) | ||
|
||
if interactive: | ||
db1.enable_interactive() | ||
db2.enable_interactive() | ||
|
||
start = time.time() | ||
|
||
try: | ||
options = dict( | ||
min_time=min_age and parse_time_before_now(min_age), max_time=max_age and parse_time_before_now(max_age) | ||
) | ||
except ParseError as e: | ||
logging.error("Error while parsing age expression: %s" % e) | ||
return | ||
|
||
table1 = TableSegment(db1, (table1_name,), key_column, update_column, columns, **options) | ||
table2 = TableSegment(db2, (table2_name,), key_column, update_column, columns, **options) | ||
|
||
differ = TableDiffer(bisection_factor=bisection_factor, bisection_threshold=bisection_threshold, debug=debug) | ||
diff_iter = differ.diff_tables(table1, table2) | ||
|
||
if limit: | ||
diff_iter = islice(diff_iter, int(limit)) | ||
|
||
if stats: | ||
diff = list(diff_iter) | ||
unique_diff_count = len({i[0] for _, i in diff}) | ||
percent = 100 * unique_diff_count / table1.count | ||
print(f"Diff-Total: {len(diff)} changed rows out of {table1.count}") | ||
print(f"Diff-Percent: {percent:.4f}%") | ||
plus = len([1 for op, _ in diff if op == "+"]) | ||
minus = len([1 for op, _ in diff if op == "-"]) | ||
print(f"Diff-Split: +{plus} -{minus}") | ||
else: | ||
for op, key in diff_iter: | ||
print(op, key) | ||
sys.stdout.flush() | ||
|
||
end = time.time() | ||
|
||
logging.info(f"Duration: {end-start:.2f} seconds.") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.