diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..bf4997f --- /dev/null +++ b/TODO.md @@ -0,0 +1,82 @@ + + +- every json file has the same structure, there top-level keys signify what + type of data it is, e.g. all raw data would be under a "raw_schema_read" field. + This allows us to merge a directory of cached data, and so that various + tools can have json files just about them on the SGSCHEMA_PATH, e.g.: + + sgevents.json: + + { + 'entity_aliases': { + 'sgevents:EventReceipt': 'CustomNonProjectEntity01' + }, + 'field_aliases': { + 'CustomNonProjectEntity01': { + 'type': 'sg_type' + } + }, + 'field_tags': { + 'PublishEvent': { + 'sg_type': ['sgcache:include'] + } + } + } + + { + 'PublishEvent': { + 'aliases': ['sgpublish:Publish', 'Publish'], + 'fields': { + 'sg_type': { + 'aliases': ['sgpublish:type', 'type'], + 'data_type': 'text', + 'name': 'Type', + 'tags': ['sgcache:include'], + } + } + } + } + +- caches of the raw schema; both public ones and the private one +- cache of the reduced schema + +- role assignments for columns, so that our tools that + access roles (via a special syntax) instead of actual column names + + e.g.: PublishEvent.$version -> PublishEvent.sg_version_1 + + Can have non-alnum in there, e.g.: PublishEvent.$sgpublish:publish:type + +- entity type renames, so that we can use custom entities for + whatever we want, e.g.: + + MyType: CustomEntity02 + +- Arbitrary tags/meta, e.g. if something is used by sgcache or not. + + EntityType.field: sgcache: include: true + + Could we then have xpath like expressions? + e.g.: EntityType.[sgcache.include==true] + PublishEvent.[sgpublish.is_core] + + Tags: PublishEvent.$sgpublish:core -> {sg_code,sg_type,...} + +- Automatic sg_ prefix detection: + Publish.type -> PublishEvent.sg_type + + Have a "doctor" which tells us the potential problems with our schema, + such as two columns that are the same minus the prefix + +- Force a specific name, to skip the rewriting rules, e.g.: Publish.!type + This is more in SGSession (or other consumers) + + +- Are tags/alises forward or backward declared? + + schema.PublishEvent.aliases = ['Publish'] + vs + schema.entity_aliases['Publish'] = 'PublishEvent' + + schema.PublishEvent.sg_type.aliases = ['type'] + schema.field_aliases['PublishEvent']['type'] = 'sg_type' diff --git a/sgschema/__init__.py b/sgschema/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sgschema/schema.py b/sgschema/schema.py new file mode 100644 index 0000000..7a0c34d --- /dev/null +++ b/sgschema/schema.py @@ -0,0 +1,141 @@ +import json +import os +import re + +import requests +import yaml + + + +class Schema(object): + + def __init__(self): + + self._raw_fields = None + self._raw_entities = None + self._raw_private = None + + self.entities = {} + self.fields = {} + self.entity_aliases = {} + self.field_aliases = {} + self.field_tags = {} + + + def read(self, sg): + + # SG.schema_field_read() is the same data per-entity as SG.schema_read(). + # SG.schema_entity_read() contains global name and visibility of each + # entity type, but the visibility is likely to just be True for everything. + self._raw_fields = sg.schema_read() + self._raw_entities = sg.schema_entity_read() + + # We also want the private schema which drives the website. + # See . + + session = requests.Session() + session.cookies['_session_id'] = sg.get_session_token() + + js = session.get(sg.base_url + '/page/schema').text + encoded = js.splitlines()[0] + m = re.match(r'^SG\.schema = new SG\.Schema\((.+)\);\s*$', encoded) + if not m: + raise ValueError('schema does not appear to be at %s/page/schema' % sg.base_url) + + self._raw_private = json.loads(m.group(1)) + + self._reduce_raw() + + def _reduce_raw(self): + + for type_name, raw_entity in self._raw_entities.iteritems(): + + self.entities[type_name] = entity = {} + for name in ('name', ): + entity[name] = raw_entity[name]['value'] + + for type_name, raw_fields in self._raw_fields.iteritems(): + + raw_fields = self._raw_fields[type_name] + self.fields[type_name] = fields = {} + + for field_name, raw_field in raw_fields.iteritems(): + + fields[field_name] = field = {} + + for key in 'name', 'data_type': + field[key] = raw_field[key]['value'] + + raw_private = self._raw_private['entity_fields'][type_name].get(field_name, {}) + + if raw_private.get('identifier_column'): + field['identifier_column'] = True + self.identifier_columns[type_name] = field_name + + if field['data_type'] in ('entity', 'multi_entity'): + types_ = raw_private['allowed_entity_types'] or [] + field['allowed_entity_types'] = types_[:] + + def _dump_prep(self, value): + if isinstance(value, unicode): + return value.encode("utf8") + elif isinstance(value, dict): + return {self._dump_prep(k): self._dump_prep(v) for k, v in value.iteritems()} + elif isinstance(value, (tuple, list)): + return [self._dump_prep(x) for x in value] + else: + return value + + def dump(self, dir_path): + for name in 'fields', 'entities', 'private': + value = getattr(self, '_raw_' + name) + if value: + with open(os.path.join(dir_path, 'raw_%s.json' % name), 'w') as fh: + fh.write(json.dumps(value, indent=4, sort_keys=True)) + for name in ('fields',): + value = getattr(self, name) + if value: + with open(os.path.join(dir_path, name + '.json'), 'w') as fh: + fh.write(json.dumps(self._dump_prep(value), indent=4, sort_keys=True)) + + def load(self, dir_path, raw=False): + + if not raw: + for name in ('fields', 'entities'): + path = os.path.join(dir_path, name + '.json') + if os.path.exists(path): + with open(path) as fh: + setattr(self, name, json.load(fh)) + if self.fields: + self._build_associations() + + if raw or not self.fields: + for name in 'fields', 'entities', 'private': + path = os.path.join(dir_path, 'raw_%s.json' % name) + if os.path.exists(path): + with open(path) as fh: + setattr(self, '_raw_' + name, json.load(fh)) + self._reduce_raw() + + + +if __name__ == '__main__': + + import time + from shotgun_api3_registry import connect + + sg = connect(use_cache=False) + + schema = Schema() + + if False: + schema.read(sg) + else: + schema.load('sandbox', raw=True) + + schema.dump('sandbox') + + t = time.time() + schema.load('sandbox') + print 1000 * (time.time() - t) +