From f18e65ab545e7fcea06e42ab9e0e8afdf85b0167 Mon Sep 17 00:00:00 2001
From: Mike Boers <westernx@mikeboers.com>
Date: Mon, 21 Sep 2015 13:07:37 -0700
Subject: [PATCH] Start of project

---
 TODO.md              |  82 +++++++++++++++++++++++++
 sgschema/__init__.py |   0
 sgschema/schema.py   | 141 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 223 insertions(+)
 create mode 100644 TODO.md
 create mode 100644 sgschema/__init__.py
 create mode 100644 sgschema/schema.py

diff --git a/TODO.md b/TODO.md
new file mode 100644
index 0000000..bf4997f
--- /dev/null
+++ b/TODO.md
@@ -0,0 +1,82 @@
+
+
+- every json file has the same structure, there top-level keys signify what
+  type of data it is, e.g. all raw data would be under a "raw_schema_read" field.
+  This allows us to merge a directory of cached data, and so that various
+  tools can have json files just about them on the SGSCHEMA_PATH, e.g.:
+
+    sgevents.json:
+
+    {
+      'entity_aliases': {
+        'sgevents:EventReceipt': 'CustomNonProjectEntity01'
+      },
+      'field_aliases': {
+        'CustomNonProjectEntity01': {
+          'type': 'sg_type'
+        }
+      },
+      'field_tags': {
+        'PublishEvent': {
+          'sg_type': ['sgcache:include']
+        }
+      }
+    }
+
+    {
+      'PublishEvent': {
+        'aliases': ['sgpublish:Publish', 'Publish'],
+        'fields': {
+          'sg_type': {
+            'aliases': ['sgpublish:type', 'type'],
+            'data_type': 'text',
+            'name': 'Type',
+            'tags': ['sgcache:include'],
+        }
+      }
+    }
+  }
+
+- caches of the raw schema; both public ones and the private one
+- cache of the reduced schema
+
+- role assignments for columns, so that our tools that
+  access roles (via a special syntax) instead of actual column names
+
+  e.g.: PublishEvent.$version -> PublishEvent.sg_version_1
+
+  Can have non-alnum in there, e.g.: PublishEvent.$sgpublish:publish:type
+
+- entity type renames, so that we can use custom entities for
+  whatever we want, e.g.:
+
+  MyType: CustomEntity02
+
+- Arbitrary tags/meta, e.g. if something is used by sgcache or not.
+  
+  EntityType.field: sgcache: include: true
+
+  Could we then have xpath like expressions?
+  e.g.: EntityType.[sgcache.include==true]
+        PublishEvent.[sgpublish.is_core]
+
+  Tags: PublishEvent.$sgpublish:core -> {sg_code,sg_type,...}
+
+- Automatic sg_ prefix detection:
+  Publish.type -> PublishEvent.sg_type
+
+  Have a "doctor" which tells us the potential problems with our schema,
+  such as two columns that are the same minus the prefix
+
+- Force a specific name, to skip the rewriting rules, e.g.: Publish.!type
+  This is more in SGSession (or other consumers)
+
+
+- Are tags/alises forward or backward declared?
+  
+  schema.PublishEvent.aliases = ['Publish']
+  vs
+  schema.entity_aliases['Publish'] = 'PublishEvent'
+
+  schema.PublishEvent.sg_type.aliases = ['type']
+  schema.field_aliases['PublishEvent']['type'] = 'sg_type'
diff --git a/sgschema/__init__.py b/sgschema/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/sgschema/schema.py b/sgschema/schema.py
new file mode 100644
index 0000000..7a0c34d
--- /dev/null
+++ b/sgschema/schema.py
@@ -0,0 +1,141 @@
+import json
+import os
+import re
+
+import requests
+import yaml
+
+
+
+class Schema(object):
+
+    def __init__(self):
+
+        self._raw_fields = None
+        self._raw_entities = None
+        self._raw_private = None
+
+        self.entities = {}
+        self.fields = {}
+        self.entity_aliases = {}
+        self.field_aliases = {}
+        self.field_tags = {}
+        
+
+    def read(self, sg):
+        
+        # SG.schema_field_read() is the same data per-entity as SG.schema_read().
+        # SG.schema_entity_read() contains global name and visibility of each
+        # entity type, but the visibility is likely to just be True for everything.
+        self._raw_fields = sg.schema_read()
+        self._raw_entities = sg.schema_entity_read()
+
+        # We also want the private schema which drives the website.
+        # See <http://mikeboers.com/blog/2015/07/21/a-complete-shotgun-schema>.
+
+        session = requests.Session()
+        session.cookies['_session_id'] = sg.get_session_token()
+        
+        js = session.get(sg.base_url + '/page/schema').text
+        encoded = js.splitlines()[0]
+        m = re.match(r'^SG\.schema = new SG\.Schema\((.+)\);\s*$', encoded)
+        if not m:
+            raise ValueError('schema does not appear to be at %s/page/schema' % sg.base_url)
+
+        self._raw_private = json.loads(m.group(1))
+
+        self._reduce_raw()
+
+    def _reduce_raw(self):
+
+        for type_name, raw_entity in self._raw_entities.iteritems():
+
+            self.entities[type_name] = entity = {}
+            for name in ('name', ):
+                entity[name] = raw_entity[name]['value']
+
+        for type_name, raw_fields in self._raw_fields.iteritems():
+
+            raw_fields = self._raw_fields[type_name]
+            self.fields[type_name] = fields = {}
+
+            for field_name, raw_field in raw_fields.iteritems():
+
+                fields[field_name] = field = {}
+
+                for key in 'name', 'data_type':
+                    field[key] = raw_field[key]['value']
+
+                raw_private = self._raw_private['entity_fields'][type_name].get(field_name, {})
+
+                if raw_private.get('identifier_column'):
+                    field['identifier_column'] = True
+                    self.identifier_columns[type_name] = field_name
+
+                if field['data_type'] in ('entity', 'multi_entity'):
+                    types_ = raw_private['allowed_entity_types'] or []
+                    field['allowed_entity_types'] = types_[:]
+
+    def _dump_prep(self, value):
+        if isinstance(value, unicode):
+            return value.encode("utf8")
+        elif isinstance(value, dict):
+            return {self._dump_prep(k): self._dump_prep(v) for k, v in value.iteritems()}
+        elif isinstance(value, (tuple, list)):
+            return [self._dump_prep(x) for x in value]
+        else:
+            return value
+
+    def dump(self, dir_path):
+        for name in 'fields', 'entities', 'private':
+            value = getattr(self, '_raw_' + name)
+            if value:
+                with open(os.path.join(dir_path, 'raw_%s.json' % name), 'w') as fh:
+                   fh.write(json.dumps(value, indent=4, sort_keys=True))
+        for name in ('fields',):
+            value = getattr(self, name)
+            if value:
+                with open(os.path.join(dir_path, name + '.json'), 'w') as fh:
+                    fh.write(json.dumps(self._dump_prep(value), indent=4, sort_keys=True))
+
+    def load(self, dir_path, raw=False):
+        
+        if not raw:
+            for name in ('fields', 'entities'):
+                path = os.path.join(dir_path, name + '.json')
+                if os.path.exists(path):
+                    with open(path) as fh:
+                        setattr(self, name, json.load(fh))
+            if self.fields:
+                self._build_associations()
+
+        if raw or not self.fields:
+            for name in 'fields', 'entities', 'private':
+                path = os.path.join(dir_path, 'raw_%s.json' % name)
+                if os.path.exists(path):
+                    with open(path) as fh:
+                        setattr(self, '_raw_' + name, json.load(fh))
+            self._reduce_raw()
+
+
+
+if __name__ == '__main__':
+
+    import time
+    from shotgun_api3_registry import connect
+
+    sg = connect(use_cache=False)
+
+    schema = Schema()
+
+    if False:
+        schema.read(sg)
+    else:
+        schema.load('sandbox', raw=True)
+
+    schema.dump('sandbox')
+
+    t = time.time()
+    schema.load('sandbox')
+    print 1000 * (time.time() - t)
+