diff --git a/.vscode/settings.json b/.vscode/settings.json index 74ca387..53b7c4e 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -17,7 +17,8 @@ ".vscode/": true, "**/*.log": true, "**/db.sql": true, - "**/.cq/": true + "**/.cq/": true, + "db.sql-journal": true }, "editor.formatOnSave": true, "editor.minimap.enabled": false diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b9bc9f..120b66e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## 1.2.0 (2023-02-28) + +### Features + +- Managed Metadata support + ## 1.1.0 (2023-02-26) ### Features diff --git a/README.md b/README.md index a91fd12..a5654ea 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ ## Roadmap - [x] Lists and Document Libraries data -- [ ] [Managed Metadata data](https://github.com/koltyakov/cq-source-sharepoint/issues/12) +- [x] [Managed Metadata terms](https://github.com/koltyakov/cq-source-sharepoint/issues/12) - [ ] [User Profile Service data](https://github.com/koltyakov/cq-source-sharepoint/issues/13) - [ ] [Search queries data](https://github.com/koltyakov/cq-source-sharepoint/issues/14) - [ ] Content types based rollup @@ -36,7 +36,7 @@ spec: name: "sharepoint" registry: "github" path: "koltyakov/sharepoint" - version: "v1.1.0" # provide the latest stable version + version: "v1.2.0" # provide the latest stable version destinations: ["postgresql"] # provide the list of used destinations spec: # Spec is mandatory @@ -68,7 +68,7 @@ SharePoint On-Premise auth is also supported, based on your farm configuration y ### Entities configuration -So far, the plugin supports lists and document libraries data fetching. Base on feedback and use cases, we have a strategy for extending the plugin to support other SharePoint API entities, e.g. Managed Metadata, UPS, etc. +So far, the plugin supports lists, document libraries and managed metadata fetching. Base on feedback and use cases, we have a strategy for extending the plugin to support other SharePoint API entities. A single source `yml` configuration assumes fetching data from a single SharePoint site. If you need to fetch data from multiple sites, you can create multiple source configurations. @@ -76,7 +76,7 @@ A single source `yml` configuration assumes fetching data from a single SharePoi # sharepoint.yml # ... spec: - # A map of URIs to the list configuration + # A map of URIs with the list configurations # If no lists are provided, nothing will be fetched lists: # List or Document library URI - a relative path without a site URL @@ -105,6 +105,14 @@ spec: Lists/AnotherList: select: - Title + # A map of MMD term sets IDs (GUIDs) + # If no term sets provided, no terms will be fetched + mmd: + # Term set ID + 8ed8c9ea-7052-4c1d-a4d7-b9c10bffea6f: + # Optional, an alias for the table name + # the name of the alias is prefixed with `mmd_` + alias: "department" ``` #### User Information List @@ -151,6 +159,19 @@ spec: alias: "document" ``` +### Managed Metadata + +To configure managed metadata fetching, you need to provide a term set ID (GUID) and an optional alias for the table name. + +```yaml +# sharepoint.yml +# ... +spec: + mmd: + 8ed8c9ea-7052-4c1d-a4d7-b9c10bffea6f: + alias: "department" +``` + ## Get started ### Install CloudQuery @@ -181,7 +202,7 @@ spec: name: "sharepoint" registry: "github" path: "koltyakov/sharepoint" - version: "v1.1.0" # https://github.com/koltyakov/cq-source-sharepoint/releases + version: "v1.2.0" # https://github.com/koltyakov/cq-source-sharepoint/releases destinations: ["sqlite"] spec: auth: diff --git a/client/client.go b/client/client.go index 8b23e94..dc218e6 100644 --- a/client/client.go +++ b/client/client.go @@ -9,6 +9,7 @@ import ( "github.com/cloudquery/plugin-sdk/specs" "github.com/koltyakov/cq-source-sharepoint/resources/auth" "github.com/koltyakov/cq-source-sharepoint/resources/lists" + "github.com/koltyakov/cq-source-sharepoint/resources/mmd" "github.com/rs/zerolog" ) @@ -16,6 +17,7 @@ type Client struct { Tables schema.Tables lists *lists.Lists + mmd *mmd.MMD source specs.Source opts source.Options @@ -39,17 +41,32 @@ func NewClient(_ context.Context, logger zerolog.Logger, src specs.Source, opts // sp.Conf(&api.RequestConfig{Context: ctx}) // for some reason gets context cancelled immediately client := &Client{ - lists: lists.NewLists(sp, logger), + lists: lists.NewLists(sp, logger), + mmd: mmd.NewMMD(sp, logger), + source: src, opts: opts, } client.Tables = make(schema.Tables, 0, len(spec.Lists)) + // Managed metadata tables prepare + for termSetID, mmdSpec := range spec.MMD { + table, err := client.mmd.GetDestTable(termSetID, mmdSpec) + if err != nil { + return nil, fmt.Errorf("failed to get table from term set \"%s\": %w", termSetID, err) + } + if table != nil { + logger.Debug().Str("table", table.Name).Str("termset", termSetID).Str("columns", table.Columns.String()).Msg("columns for table") + client.Tables = append(client.Tables, table) + } + } + + // Lists tables prepare for listURI, listSpec := range spec.Lists { table, err := client.lists.GetDestTable(listURI, listSpec) if err != nil { - return nil, fmt.Errorf("failed to get table from list: %w", err) + return nil, fmt.Errorf("failed to get table from list \"%s\": %w", listURI, err) } if table != nil { logger.Debug().Str("table", table.Name).Str("list", listURI).Str("columns", table.Columns.String()).Msg("columns for table") diff --git a/client/spec.go b/client/spec.go index 53b1731..0e562c2 100644 --- a/client/spec.go +++ b/client/spec.go @@ -7,6 +7,7 @@ import ( "github.com/cloudquery/plugin-sdk/specs" "github.com/koltyakov/cq-source-sharepoint/resources/auth" "github.com/koltyakov/cq-source-sharepoint/resources/lists" + "github.com/koltyakov/cq-source-sharepoint/resources/mmd" ) // Spec is the configuration for a SharePoint source @@ -17,6 +18,9 @@ type Spec struct { // A map of URIs to the list configuration // If no lists are provided, nothing will be fetched Lists map[string]lists.Spec `json:"lists"` + + // A map of TermSets GUIDs to the MMD configuration + MMD map[string]mmd.Spec `json:"mmd"` } // SetDefaults sets default values for top level spec @@ -52,6 +56,18 @@ func (s *Spec) Validate() error { aliases[alias] = true } + // All term sets should have unique aliases + for terSetID, mmdSpec := range s.MMD { + alias := strings.ToLower("mmd_" + mmdSpec.Alias) + if mmdSpec.Alias == "" { + alias = strings.ToLower("mmd_" + strings.ReplaceAll(terSetID, "-", "")) + } + if _, ok := aliases[alias]; ok { + return fmt.Errorf("duplicate alias \"%s\" for term set \"%s\" configuration", alias, terSetID) + } + aliases[alias] = true + } + return nil } diff --git a/client/sync.go b/client/sync.go index de16e84..8de5d9b 100644 --- a/client/sync.go +++ b/client/sync.go @@ -16,12 +16,23 @@ func (c *Client) Sync(ctx context.Context, metrics *source.Metrics, res chan<- * } } - // ToDo: Separate tables for different types of resources - for _, table := range c.Tables { + // Lists sync + for tableName := range c.lists.TablesMap { + table := c.Tables.Get(tableName) m := metrics.TableClient[table.Name][c.ID()] if err := c.lists.Sync(ctx, m, res, table); err != nil { return fmt.Errorf("syncing table %s: %w", table.Name, err) } } + + // MMD (Terms from TermSets) sync + for tableName := range c.mmd.TablesMap { + table := c.Tables.Get(tableName) + m := metrics.TableClient[table.Name][c.ID()] + if err := c.mmd.Sync(ctx, m, res, table); err != nil { + return fmt.Errorf("syncing table %s: %w", table.Name, err) + } + } + return nil } diff --git a/debug/.gitignore b/debug/.gitignore index 6300ab4..270cbe9 100644 --- a/debug/.gitignore +++ b/debug/.gitignore @@ -2,4 +2,5 @@ db/ *.log .cq/ db.sql -.env \ No newline at end of file +.env +db.sql-journal \ No newline at end of file diff --git a/debug/sharepoint.yml b/debug/sharepoint.yml index 9fb355b..e25e355 100644 --- a/debug/sharepoint.yml +++ b/debug/sharepoint.yml @@ -69,3 +69,6 @@ spec: - OrderDate - Total alias: "order" + mmd: + 8ed8c9ea-7052-4c1d-a4d7-b9c10bffea6f: + alias: "department" diff --git a/resources/lists/lists.go b/resources/lists/lists.go index e63b34d..b452a13 100644 --- a/resources/lists/lists.go +++ b/resources/lists/lists.go @@ -15,7 +15,7 @@ type Lists struct { sp *api.SP logger zerolog.Logger - tablesMap map[string]Model // normalized table name to table metadata (map[CQ Table Name]Model) + TablesMap map[string]Model // normalized table name to table metadata (map[CQ Table Name]Model) } type Model struct { @@ -28,7 +28,7 @@ func NewLists(sp *api.SP, logger zerolog.Logger) *Lists { return &Lists{ sp: sp, logger: logger, - tablesMap: map[string]Model{}, + TablesMap: map[string]Model{}, } } @@ -107,7 +107,7 @@ func (l *Lists) GetDestTable(listURI string, spec Spec) (*schema.Table, error) { model.FieldsMap[col.Name] = prop } - l.tablesMap[table.Name] = *model + l.TablesMap[table.Name] = *model return table, nil } diff --git a/resources/lists/sync.go b/resources/lists/sync.go index 57be255..2d07189 100644 --- a/resources/lists/sync.go +++ b/resources/lists/sync.go @@ -14,7 +14,7 @@ import ( ) func (l *Lists) Sync(ctx context.Context, metrics *source.TableClientMetrics, res chan<- *schema.Resource, table *schema.Table) error { - opts := l.tablesMap[table.Name] + opts := l.TablesMap[table.Name] logger := l.logger.With().Str("table", table.Name).Logger() logger.Debug().Strs("cols", opts.Spec.Select).Msg("selecting columns from list") diff --git a/resources/mmd/mmd.go b/resources/mmd/mmd.go new file mode 100644 index 0000000..ee9d251 --- /dev/null +++ b/resources/mmd/mmd.go @@ -0,0 +1,91 @@ +package mmd + +import ( + "strings" + + "github.com/cloudquery/plugin-sdk/schema" + "github.com/koltyakov/cq-source-sharepoint/internal/util" + "github.com/koltyakov/gosip/api" + "github.com/rs/zerolog" +) + +type MMD struct { + sp *api.SP + logger zerolog.Logger + + TablesMap map[string]Model // normalized table name to table metadata (map[CQ Table Name]Model) +} + +type Model struct { + ID string + Spec Spec + FieldsMap map[string]string // cq column name to column metadata +} + +func NewMMD(sp *api.SP, logger zerolog.Logger) *MMD { + return &MMD{ + sp: sp, + logger: logger, + TablesMap: map[string]Model{}, + } +} + +func (m *MMD) GetDestTable(terSetID string, spec Spec) (*schema.Table, error) { + tableName := util.NormalizeEntityName(strings.ReplaceAll(terSetID, "-", "")) // ToDo: ${TertGoup}_${TermSetName} + if spec.Alias != "" { + tableName = util.NormalizeEntityName(spec.Alias) + } + + table := &schema.Table{ + Name: "sharepoint_mmd_" + tableName, + Description: "", // TermSetName + Columns: []schema.Column{ + {Name: "id", Type: schema.TypeUUID, CreationOptions: schema.ColumnCreationOptions{PrimaryKey: true}}, + {Name: "name", Type: schema.TypeString}, + {Name: "description", Type: schema.TypeString}, + {Name: "tagging", Type: schema.TypeBool}, + {Name: "deprecated", Type: schema.TypeBool}, + {Name: "pinned", Type: schema.TypeBool}, + {Name: "reused", Type: schema.TypeBool}, + {Name: "root", Type: schema.TypeBool}, + {Name: "source", Type: schema.TypeBool}, + {Name: "path", Type: schema.TypeStringArray}, + {Name: "children", Type: schema.TypeInt}, + {Name: "merged", Type: schema.TypeUUIDArray}, + {Name: "shared_props", Type: schema.TypeJSON}, + {Name: "local_props", Type: schema.TypeJSON}, + {Name: "custom_sort", Type: schema.TypeUUIDArray}, + {Name: "owner", Type: schema.TypeString}, + {Name: "created", Type: schema.TypeTimestamp}, + {Name: "modified", Type: schema.TypeTimestamp}, + }, + } + + // ToDo: Remove this reverce mapping + m.TablesMap[table.Name] = Model{ + ID: terSetID, + Spec: spec, + FieldsMap: map[string]string{ + "id": "Id", + "name": "Name", + "description": "Description", + "tagging": "IsAvailableForTagging", + "deprecated": "IsDeprecated", + "pinned": "IsPinned", + "reused": "IsReused", + "root": "IsRoot", + "source": "IsSourceTerm", + "path": "PathOfTerm", + "children": "TermsCount", + "merged": "MergedTermIds", + "shared_props": "CustomProperties", + "local_props": "LocalCustomProperties", + "custom_sort": "CustomSortOrder", + "owner": "Owner", + "created": "CreatedDate", + "modified": "LastModifiedDate", + }, + } + + return table, nil +} diff --git a/resources/mmd/spec.go b/resources/mmd/spec.go new file mode 100644 index 0000000..b1d3362 --- /dev/null +++ b/resources/mmd/spec.go @@ -0,0 +1,11 @@ +package mmd + +// Spec is the configuration for MMD term set source +type Spec struct { + // Optional, an alias for the table name + // Don't map different lists to the same table - such scenariou is not supported + Alias string `json:"alias"` +} + +// SetDefault sets default values for list spec +// func (s *Spec) SetDefault() {} diff --git a/resources/mmd/sync.go b/resources/mmd/sync.go new file mode 100644 index 0000000..47fb248 --- /dev/null +++ b/resources/mmd/sync.go @@ -0,0 +1,98 @@ +package mmd + +import ( + "context" + "fmt" + "sort" + "strconv" + "strings" + "time" + + "github.com/cloudquery/plugin-sdk/plugins/source" + "github.com/cloudquery/plugin-sdk/schema" + "github.com/koltyakov/cq-source-sharepoint/internal/util" + "github.com/thoas/go-funk" +) + +func (m *MMD) Sync(ctx context.Context, metrics *source.TableClientMetrics, res chan<- *schema.Resource, table *schema.Table) error { + opts := m.TablesMap[table.Name] + logger := m.logger.With().Str("table", table.Name).Logger() + + taxonomy := m.sp.Taxonomy() + terms, err := taxonomy.Stores().Default().Sets().GetByID(opts.ID).GetAllTerms() + if err != nil { + metrics.Errors++ + return fmt.Errorf("failed to get items: %w", err) + } + + for _, itemMap := range terms { + ks := funk.Keys(itemMap).([]string) + sort.Strings(ks) + logger.Debug().Strs("keys", ks).Msg("item keys") + + colVals := make([]any, len(table.Columns)) + + for i, col := range table.Columns { + prop := opts.FieldsMap[col.Name] + colVals[i] = getRespValByProp(itemMap, prop) + } + + resource, err := resourceFromValues(table, colVals) + if err != nil { + metrics.Errors++ + return err + } + + select { + case <-ctx.Done(): + return ctx.Err() + case res <- resource: + metrics.Resources++ + } + } + + return nil +} + +func resourceFromValues(table *schema.Table, values []any) (*schema.Resource, error) { + resource := schema.NewResourceData(table, nil, values) + for i, col := range table.Columns { + if err := resource.Set(col.Name, values[i]); err != nil { + return nil, err + } + } + return resource, nil +} + +func getRespValByProp(resp map[string]any, prop string) any { + val := util.GetRespValByProp(resp, prop) + if prop == "Id" { + return strings.ReplaceAll(strings.ReplaceAll(val.(string), "/Guid(", ""), ")/", "") + } + if prop == "CreatedDate" || prop == "LastModifiedDate" { + dateStr := strings.ReplaceAll(strings.ReplaceAll(val.(string), "/Date(", ""), ")/", "") + dateInt, _ := strconv.ParseInt(dateStr, 10, 64) + return time.UnixMilli(dateInt) + } + if prop == "PathOfTerm" { + return strings.Split(val.(string), ";") + } + if prop == "MergedTermIds" { + mergedTerms := val.([]any) + for i, term := range mergedTerms { + mergedTerms[i] = strings.ReplaceAll(strings.ReplaceAll(term.(string), "/Guid(", ""), ")/", "") + } + return mergedTerms + } + if prop == "CustomSortOrder" { + if val == nil { + return nil + } + sortedTerms := strings.Split(val.(string), ":") + for i, term := range sortedTerms { + sortedTerms[i] = strings.ReplaceAll(strings.ReplaceAll(term, "/Guid(", ""), ")/", "") + } + return sortedTerms + } + return val +}