|
| 1 | +#!/usr/bin env python |
| 2 | +"""Publish PubSub messages asking the worker to (re)process record. |
| 3 | +
|
| 4 | +Tip: Invoke via xargs to process multiple records from a file, |
| 5 | +e.g. cat records | xargs ... |
| 6 | +""" |
| 7 | + |
| 8 | +import argparse |
| 9 | +import time |
| 10 | +from google.cloud import pubsub_v1 |
| 11 | +from google.cloud import ndb |
| 12 | +import osv |
| 13 | +import requests |
| 14 | + |
| 15 | +DEFAULT_TIMEOUT = 60 |
| 16 | +PUBSUB_TOPIC_ID = "tasks" |
| 17 | + |
| 18 | + |
| 19 | +def publish_update_message(project_id, topic_id, source, path, original_sha256): |
| 20 | + """Publish a message to a Pub/Sub topic with the provided data as attributes. |
| 21 | +
|
| 22 | + Args: |
| 23 | + project_id: The ID of the GCP project. |
| 24 | + topic_id: The ID of the Pub/Sub topic. |
| 25 | + source: The record source ID. |
| 26 | + path: The record path. |
| 27 | + original_sha256: The original SHA256 checksum of the record. |
| 28 | + """ |
| 29 | + |
| 30 | + publisher = pubsub_v1.PublisherClient() |
| 31 | + topic_path = publisher.topic_path(project_id, topic_id) |
| 32 | + |
| 33 | + # Create a PubsubMessage object with empty data and attributes |
| 34 | + message = pubsub_v1.types.PubsubMessage( |
| 35 | + data=b"", # Empty data field |
| 36 | + attributes={ |
| 37 | + "type": "update", |
| 38 | + "source": source, |
| 39 | + "path": path, |
| 40 | + "original_sha256": original_sha256, |
| 41 | + "deleted": "false", |
| 42 | + "req_timestamp": str(int(time.time())), |
| 43 | + }, |
| 44 | + ) |
| 45 | + |
| 46 | + print(f'Publishing: {message.attributes}') |
| 47 | + future = publisher.publish(topic_path, message.data, **message.attributes) |
| 48 | + print(f"Published message ID: {future.result()}") |
| 49 | + |
| 50 | + |
| 51 | +def main(): |
| 52 | + parser = argparse.ArgumentParser( |
| 53 | + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) |
| 54 | + parser.add_argument( |
| 55 | + "--project_id", required=True, help="The Google Cloud project ID") |
| 56 | + parser.add_argument("--source", required=True, help="The record source ID") |
| 57 | + parser.add_argument( |
| 58 | + "--timeout", |
| 59 | + type=int, |
| 60 | + default=DEFAULT_TIMEOUT, |
| 61 | + help="Default timeout to use for operations") |
| 62 | + parser.add_argument( |
| 63 | + "bugs", action="append", nargs="+", help="The bug IDs to operate on") |
| 64 | + |
| 65 | + args = parser.parse_args() |
| 66 | + |
| 67 | + datastore_client = ndb.Client(args.project_id) |
| 68 | + |
| 69 | + with datastore_client.context(): |
| 70 | + source = osv.SourceRepository.get_by_id(args.source) |
| 71 | + |
| 72 | + if source.type == osv.SourceRepositoryType.REST_ENDPOINT: |
| 73 | + for bug in args.bugs[0]: |
| 74 | + record_url = f'{source.link}{bug}{source.extension}' |
| 75 | + path = f'{bug}{source.extension}' |
| 76 | + print(f'Trying: {record_url}') |
| 77 | + response = requests.get(record_url, timeout=args.timeout) |
| 78 | + try: |
| 79 | + response.raise_for_status() |
| 80 | + except requests.HTTPError as e: |
| 81 | + print(e) |
| 82 | + continue |
| 83 | + original_sha256 = osv.sha256_bytes(response.text.encode()) |
| 84 | + publish_update_message(args.project_id, PUBSUB_TOPIC_ID, args.source, |
| 85 | + path, original_sha256) |
| 86 | + |
| 87 | + if source.type == osv.SourceRepositoryType.GIT: |
| 88 | + raise NotImplementedError() |
| 89 | + |
| 90 | + if source.type == osv.SourceRepositoryType.BUCKET: |
| 91 | + raise NotImplementedError("Use reimport_gcs_record.py for now") |
| 92 | + |
| 93 | + |
| 94 | +if __name__ == "__main__": |
| 95 | + main() |
0 commit comments