KnowledgeLinks
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎kldgraph/__pycache__/__init__.cpython-36.pyc
-147 Bytes b/‎kldgraph/__pycache__/__init__.cpython-36.pyc
-147 Bytes
diff --git a/‎kldgraph/__pycache__/dgraphapi.cpython-36.pyc
-254 Bytes b/‎kldgraph/__pycache__/dgraphapi.cpython-36.pyc
-254 Bytes
diff --git a/‎kldgraph/__pycache__/ntparse.cpython-36.pyc
-2.1 KB b/‎kldgraph/__pycache__/ntparse.cpython-36.pyc
-2.1 KB
diff --git a/‎kldgraph/__pycache__/rdfuri.cpython-36.pyc
-3.85 KB b/‎kldgraph/__pycache__/rdfuri.cpython-36.pyc
-3.85 KB
diff --git a/‎kldgraph/batcher.py
Lines changed: 35 additions & 0 deletions b/‎kldgraph/batcher.py
Lines changed: 35 additions & 0 deletions
diff --git a/‎kldgraph/consumer.py
Lines changed: 21 additions & 0 deletions b/‎kldgraph/consumer.py
Lines changed: 21 additions & 0 deletions
diff --git a/‎kldgraph/dataset.py
Lines changed: 12 additions & 0 deletions b/‎kldgraph/dataset.py
Lines changed: 12 additions & 0 deletions
diff --git a/‎kldgraph/dgraphapi.py
Lines changed: 76 additions & 64 deletions b/‎kldgraph/dgraphapi.py
Lines changed: 76 additions & 64 deletions
diff --git a/‎kldgraph/ntparse.py
Lines changed: 20 additions & 2 deletions b/‎kldgraph/ntparse.py
Lines changed: 20 additions & 2 deletions
@@ -1,2 +1,3 @@
 /venv
 /.idea
+*__pycache__*
@@ -0,0 +1,35 @@
+from kldgraph import rdfuri, consumer
+
+
+class BatchProcessor:
+    """
+    Records the item count and sends the data to BatchConsumer once the threshold is met.
+    """
+
+    def __init__(self, dataset, batch_consumer=None, batch_size=10000):
+        self.dataset = dataset
+        self.batch_size = batch_size
+        self.count = 0
+        self.total_count = 0
+        self.batch_consumer = batch_consumer if batch_consumer else consumer.BatchConsumer(self.dataset)
+
+    def increment(self):
+        """
+        increments the count and then test to see if the count has reached the batch size by calling test_size
+
+        :return: None
+        """
+        self.count += 1
+        self.total_count += 1
+        self.test_size_send()
+
+    def test_size_send(self):
+        """
+        tests to see if the count has reached the batch size and if it has it tells the batch_consumer to send all data
+        from the dataset.
+
+        :return: None
+        """
+        if self.count >= self.batch_size:
+            self.batch_consumer.send()
+            self.count = 0
@@ -0,0 +1,21 @@
+from kldgraph import dgraphapi, rdfuri
+
+
+class BatchConsumer:
+    """
+    consumes data and transfers tha data
+    """
+
+    def __init__(self, dataset, destination=None):
+        self.dataset = dataset
+        self.destination = destination if destination else dgraphapi.mutate_add_dataset
+
+    def send(self):
+        """
+        sends the data to the destination
+        :return:
+        """
+        result = self.destination(self.dataset)
+        self.dataset.clear()
+        rdfuri.Node.clear_all_registries()
+        return result
@@ -55,3 +55,15 @@ def are_nodes_set(self):
                     except AttributeError:
                         pass
         return True
+
+    def clear(self):
+        """
+        cycles through the dictionary object to ensure all circular reference are removed
+        :return: None
+        """
+        for subj, pred_ref in self.items():
+            for pred in pred_ref.keys():
+                self[subj][pred].clear()
+            self[subj].clear()
+        super().clear()
+
@@ -1,81 +1,93 @@
 import json
 import pydgraph
 
-client_stub = pydgraph.DgraphClientStub('localhost:9080')
-client = pydgraph.DgraphClient(client_stub)
+DEFAULT_URL = 'localhost:9080'
 
 
 class XidDoesNotExistError(Exception):
     pass
 
 
-def get_uid_for_xid(xid):
-    try:
-        return find_uid_for_xid(xid)
-    except XidDoesNotExistError:
-        return create_uid_for_xid(xid)
+class Api:
 
+    def __init__(self, url=DEFAULT_URL):
+        self.url = url
+        self.client_stub = pydgraph.DgraphClientStub(url)
+        self.client = pydgraph.DgraphClient(self.client_stub)
 
-def find_uid_for_xid(xid):
-    qry = """
-        {{
-            lookup(func: eq(xid, "{xid}"))
-                {{uid}}
-        }}
-        """.format(xid=xid)
-    try:
-        result = client.query(qry)
-        data = json.loads(result.json)
-        return data['lookup'][0]['uid']
-    except (KeyError, IndexError):
-        raise XidDoesNotExistError(xid)
-    except Exception:
-        add_xid_to_schema()
-        raise XidDoesNotExistError(xid)
+    def get_uid_for_xid(self, xid):
+        try:
+            return self.find_uid_for_xid(xid)
+        except XidDoesNotExistError:
+            return self.create_uid_for_xid(xid)
 
+    def find_uid_for_xid(self, xid):
+        qry = """
+            {{
+                lookup(func: eq(xid, "{xid}"))
+                    {{uid}}
+            }}
+            """.format(xid=xid)
+        try:
+            result = self.client.query(qry)
+            data = json.loads(result.json)
+            return data['lookup'][0]['uid']
+        except (KeyError, IndexError):
+            raise XidDoesNotExistError(xid)
+        except Exception:
+            self.add_xid_to_schema()
+            raise XidDoesNotExistError(xid)
 
-def create_uid_for_xid(xid):
-    data = {'xid': xid}
-    txn = client.txn()
-    uid = None
-    try:
-        result = txn.mutate(set_obj=data)
-        txn.commit()
-        uid = [result.uids[x] for x in result.uids][0]
-    finally:
-        txn.discard()
-    return uid
+    def create_uid_for_xid(self, xid):
+        data = {'xid': xid}
+        txn = self.client.txn()
+        uid = None
+        try:
+            result = txn.mutate(set_obj=data)
+            txn.commit()
+            uid = [result.uids[x] for x in result.uids][0]
+        finally:
+            txn.discard()
+        return uid
 
+    def add_xid_to_schema(self):
+        """
+        adds the xid schema to dgraph
+        :return: True if added
+        """
+        schema = "xid: string @index(exact) ."
+        op = pydgraph.Operation(schema=schema)
+        self.client.alter(op)
 
-def add_xid_to_schema():
-    """
-    adds the xid schema to dgraph
-    :return: True if added
-    """
-    schema = "xid: string @index(exact) ."
-    op = pydgraph.Operation(schema=schema)
-    client.alter(op)
+    def mutate_add_dataset(self, dataset):
+        """
+        adds the triples in the dataset to dgraph
+        :param dataset:
+        :return:
+        """
+        dataset.lookup_nodes()
+        txn = self.client.txn()
+        try:
+            result = txn.mutate(set_nquads=dataset.formatter.to_rdf())
+            txn.commit()
+        finally:
+            txn.discard()
+        return result
 
+    def drop_all(self):
+        """
+        Drops all data from dgraph
+        :return:
+        """
+        op = pydgraph.Operation(drop_all=True)
+        self.client.alter(op)
 
-def mutate_add_dataset(dataset):
-    """
-    adds the triples in the dataset to dgraph
-    :param dataset:
-    :return:
-    """
-    dataset.lookup_nodes()
-    txn = client.txn()
-    try:
-        result = txn.mutate(set_nquads=dataset.formatter.to_rdf())
-        txn.commit()
-    finally:
-        txn.discard()
-
-
-def drop_all():
-    """
-    Drops all data from dgraph
-    :return:
-    """
-    op = pydgraph.Operation(drop_all=True)
-    client.alter(op)
+# default instance of the API is generated on module load and the class methods of that instance are available at the
+# module level.
+default_api = Api()
+drop_all = default_api.drop_all
+mutate_add_dataset = default_api.mutate_add_dataset
+add_xid_to_schema = default_api.add_xid_to_schema
+create_uid_for_xid = default_api.create_uid_for_xid
+find_uid_for_xid = default_api.find_uid_for_xid
+get_uid_for_xid = default_api.get_uid_for_xid
@@ -1,3 +1,6 @@
+__doc__ = """
+License: GPL 2, W3C, BSD, or MIT
+"""
 import re
 from rdflib.plugins.parsers.ntriples import (r_nodeid,
                                              r_literal,
@@ -6,6 +9,8 @@
                                              unquote)
 import kldgraph.rdfuri as rdfuri
 from kldgraph.tracker import Tracker
+from kldgraph import dataset
+from kldgraph.batcher import BatchProcessor
 
 
 class Store:
@@ -23,27 +28,41 @@ def triple(self, s, p, o):
 
 
 class NtParser(NTriplesParser):
+    """
+    The parser extends rdflib's NTriplesParser
+    """
     count = 0
 
-    def __init__(self, sink=None, use_tracker=False, tracker=None):
+    def __init__(self, sink=None, use_tracker=False, tracker=None, use_batcher=False, batcher=None):
+        if not sink:
+            sink = dataset.Dataset()
         super().__init__(sink)
         self.tracker = None
         if use_tracker or tracker:
             if tracker:
                 self.tracker = tracker
             else:
                 self.tracker = Tracker()
+        if use_batcher or batcher:
+            if batcher:
+                self.batcher = batcher
+            else:
+                self.batcher = BatchProcessor(sink)
 
     def parse(self, *args):
         if self.tracker:
             self.tracker.start()
         super().parse(*args)
+        if self.batcher:
+            self.batcher.batch_consumer.send()
 
     def readline(self):
         val = super().readline()
         self.count += 1
         if self.tracker:
             self.tracker.increment_count()
+        if self.batcher:
+            self.batcher.increment()
         return val
 
     def uriref(self):
@@ -55,7 +74,6 @@ def uriref(self):
 
     def nodeid(self):
         if self.peek('_'):
-            # Fix for https://github.com/RDFLib/rdflib/issues/204
             bnode_id = self.eat(r_nodeid).group(1)
             return rdfuri.Node(bnode_id, rdfuri.NodeType.BLANK)
         return False
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,3 @@`
`1`	`1`	`/venv`
`2`	`2`	`/.idea`
	`3`	`+__pycache__`