Skip to content

Commit

Permalink
Start of project
Browse files Browse the repository at this point in the history
  • Loading branch information
mikeboers committed Sep 21, 2015
0 parents commit f18e65a
Show file tree
Hide file tree
Showing 3 changed files with 223 additions and 0 deletions.
82 changes: 82 additions & 0 deletions TODO.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@


- every json file has the same structure, there top-level keys signify what
type of data it is, e.g. all raw data would be under a "raw_schema_read" field.
This allows us to merge a directory of cached data, and so that various
tools can have json files just about them on the SGSCHEMA_PATH, e.g.:

sgevents.json:

{
'entity_aliases': {
'sgevents:EventReceipt': 'CustomNonProjectEntity01'
},
'field_aliases': {
'CustomNonProjectEntity01': {
'type': 'sg_type'
}
},
'field_tags': {
'PublishEvent': {
'sg_type': ['sgcache:include']
}
}
}

{
'PublishEvent': {
'aliases': ['sgpublish:Publish', 'Publish'],
'fields': {
'sg_type': {
'aliases': ['sgpublish:type', 'type'],
'data_type': 'text',
'name': 'Type',
'tags': ['sgcache:include'],
}
}
}
}

- caches of the raw schema; both public ones and the private one
- cache of the reduced schema

- role assignments for columns, so that our tools that
access roles (via a special syntax) instead of actual column names

e.g.: PublishEvent.$version -> PublishEvent.sg_version_1

Can have non-alnum in there, e.g.: PublishEvent.$sgpublish:publish:type

- entity type renames, so that we can use custom entities for
whatever we want, e.g.:

MyType: CustomEntity02

- Arbitrary tags/meta, e.g. if something is used by sgcache or not.

EntityType.field: sgcache: include: true

Could we then have xpath like expressions?
e.g.: EntityType.[sgcache.include==true]
PublishEvent.[sgpublish.is_core]

Tags: PublishEvent.$sgpublish:core -> {sg_code,sg_type,...}

- Automatic sg_ prefix detection:
Publish.type -> PublishEvent.sg_type

Have a "doctor" which tells us the potential problems with our schema,
such as two columns that are the same minus the prefix

- Force a specific name, to skip the rewriting rules, e.g.: Publish.!type
This is more in SGSession (or other consumers)


- Are tags/alises forward or backward declared?

schema.PublishEvent.aliases = ['Publish']
vs
schema.entity_aliases['Publish'] = 'PublishEvent'

schema.PublishEvent.sg_type.aliases = ['type']
schema.field_aliases['PublishEvent']['type'] = 'sg_type'
Empty file added sgschema/__init__.py
Empty file.
141 changes: 141 additions & 0 deletions sgschema/schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import json
import os
import re

import requests
import yaml



class Schema(object):

def __init__(self):

self._raw_fields = None
self._raw_entities = None
self._raw_private = None

self.entities = {}
self.fields = {}
self.entity_aliases = {}
self.field_aliases = {}
self.field_tags = {}


def read(self, sg):

# SG.schema_field_read() is the same data per-entity as SG.schema_read().
# SG.schema_entity_read() contains global name and visibility of each
# entity type, but the visibility is likely to just be True for everything.
self._raw_fields = sg.schema_read()
self._raw_entities = sg.schema_entity_read()

# We also want the private schema which drives the website.
# See <http://mikeboers.com/blog/2015/07/21/a-complete-shotgun-schema>.

session = requests.Session()
session.cookies['_session_id'] = sg.get_session_token()

js = session.get(sg.base_url + '/page/schema').text
encoded = js.splitlines()[0]
m = re.match(r'^SG\.schema = new SG\.Schema\((.+)\);\s*$', encoded)
if not m:
raise ValueError('schema does not appear to be at %s/page/schema' % sg.base_url)

self._raw_private = json.loads(m.group(1))

self._reduce_raw()

def _reduce_raw(self):

for type_name, raw_entity in self._raw_entities.iteritems():

self.entities[type_name] = entity = {}
for name in ('name', ):
entity[name] = raw_entity[name]['value']

for type_name, raw_fields in self._raw_fields.iteritems():

raw_fields = self._raw_fields[type_name]
self.fields[type_name] = fields = {}

for field_name, raw_field in raw_fields.iteritems():

fields[field_name] = field = {}

for key in 'name', 'data_type':
field[key] = raw_field[key]['value']

raw_private = self._raw_private['entity_fields'][type_name].get(field_name, {})

if raw_private.get('identifier_column'):
field['identifier_column'] = True
self.identifier_columns[type_name] = field_name

if field['data_type'] in ('entity', 'multi_entity'):
types_ = raw_private['allowed_entity_types'] or []
field['allowed_entity_types'] = types_[:]

def _dump_prep(self, value):
if isinstance(value, unicode):
return value.encode("utf8")
elif isinstance(value, dict):
return {self._dump_prep(k): self._dump_prep(v) for k, v in value.iteritems()}
elif isinstance(value, (tuple, list)):
return [self._dump_prep(x) for x in value]
else:
return value

def dump(self, dir_path):
for name in 'fields', 'entities', 'private':
value = getattr(self, '_raw_' + name)
if value:
with open(os.path.join(dir_path, 'raw_%s.json' % name), 'w') as fh:
fh.write(json.dumps(value, indent=4, sort_keys=True))
for name in ('fields',):
value = getattr(self, name)
if value:
with open(os.path.join(dir_path, name + '.json'), 'w') as fh:
fh.write(json.dumps(self._dump_prep(value), indent=4, sort_keys=True))

def load(self, dir_path, raw=False):

if not raw:
for name in ('fields', 'entities'):
path = os.path.join(dir_path, name + '.json')
if os.path.exists(path):
with open(path) as fh:
setattr(self, name, json.load(fh))
if self.fields:
self._build_associations()

if raw or not self.fields:
for name in 'fields', 'entities', 'private':
path = os.path.join(dir_path, 'raw_%s.json' % name)
if os.path.exists(path):
with open(path) as fh:
setattr(self, '_raw_' + name, json.load(fh))
self._reduce_raw()



if __name__ == '__main__':

import time
from shotgun_api3_registry import connect

sg = connect(use_cache=False)

schema = Schema()

if False:
schema.read(sg)
else:
schema.load('sandbox', raw=True)

schema.dump('sandbox')

t = time.time()
schema.load('sandbox')
print 1000 * (time.time() - t)

0 comments on commit f18e65a

Please sign in to comment.