Skip to content

Commit

Permalink
MMD support, #12
Browse files Browse the repository at this point in the history
  • Loading branch information
koltyakov committed Feb 28, 2023
1 parent de55652 commit 9ab5b5f
Show file tree
Hide file tree
Showing 13 changed files with 291 additions and 15 deletions.
3 changes: 2 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
".vscode/": true,
"**/*.log": true,
"**/db.sql": true,
"**/.cq/": true
"**/.cq/": true,
"db.sql-journal": true
},
"editor.formatOnSave": true,
"editor.minimap.enabled": false
Expand Down
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## 1.2.0 (2023-02-28)

### Features

- Managed Metadata support

## 1.1.0 (2023-02-26)

### Features
Expand Down
31 changes: 26 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
## Roadmap

- [x] Lists and Document Libraries data
- [ ] [Managed Metadata data](https://github.com/koltyakov/cq-source-sharepoint/issues/12)
- [x] [Managed Metadata terms](https://github.com/koltyakov/cq-source-sharepoint/issues/12)
- [ ] [User Profile Service data](https://github.com/koltyakov/cq-source-sharepoint/issues/13)
- [ ] [Search queries data](https://github.com/koltyakov/cq-source-sharepoint/issues/14)
- [ ] Content types based rollup
Expand All @@ -36,7 +36,7 @@ spec:
name: "sharepoint"
registry: "github"
path: "koltyakov/sharepoint"
version: "v1.1.0" # provide the latest stable version
version: "v1.2.0" # provide the latest stable version
destinations: ["postgresql"] # provide the list of used destinations
spec:
# Spec is mandatory
Expand Down Expand Up @@ -68,15 +68,15 @@ SharePoint On-Premise auth is also supported, based on your farm configuration y

### Entities configuration

So far, the plugin supports lists and document libraries data fetching. Base on feedback and use cases, we have a strategy for extending the plugin to support other SharePoint API entities, e.g. Managed Metadata, UPS, etc.
So far, the plugin supports lists, document libraries and managed metadata fetching. Base on feedback and use cases, we have a strategy for extending the plugin to support other SharePoint API entities.

A single source `yml` configuration assumes fetching data from a single SharePoint site. If you need to fetch data from multiple sites, you can create multiple source configurations.

```yaml
# sharepoint.yml
# ...
spec:
# A map of URIs to the list configuration
# A map of URIs with the list configurations
# If no lists are provided, nothing will be fetched
lists:
# List or Document library URI - a relative path without a site URL
Expand Down Expand Up @@ -105,6 +105,14 @@ spec:
Lists/AnotherList:
select:
- Title
# A map of MMD term sets IDs (GUIDs)
# If no term sets provided, no terms will be fetched
mmd:
# Term set ID
8ed8c9ea-7052-4c1d-a4d7-b9c10bffea6f:
# Optional, an alias for the table name
# the name of the alias is prefixed with `mmd_`
alias: "department"
```
#### User Information List
Expand Down Expand Up @@ -151,6 +159,19 @@ spec:
alias: "document"
```

### Managed Metadata

To configure managed metadata fetching, you need to provide a term set ID (GUID) and an optional alias for the table name.

```yaml
# sharepoint.yml
# ...
spec:
mmd:
8ed8c9ea-7052-4c1d-a4d7-b9c10bffea6f:
alias: "department"
```

## Get started

### Install CloudQuery
Expand Down Expand Up @@ -181,7 +202,7 @@ spec:
name: "sharepoint"
registry: "github"
path: "koltyakov/sharepoint"
version: "v1.1.0" # https://github.com/koltyakov/cq-source-sharepoint/releases
version: "v1.2.0" # https://github.com/koltyakov/cq-source-sharepoint/releases
destinations: ["sqlite"]
spec:
auth:
Expand Down
21 changes: 19 additions & 2 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@ import (
"github.com/cloudquery/plugin-sdk/specs"
"github.com/koltyakov/cq-source-sharepoint/resources/auth"
"github.com/koltyakov/cq-source-sharepoint/resources/lists"
"github.com/koltyakov/cq-source-sharepoint/resources/mmd"
"github.com/rs/zerolog"
)

type Client struct {
Tables schema.Tables

lists *lists.Lists
mmd *mmd.MMD

source specs.Source
opts source.Options
Expand All @@ -39,17 +41,32 @@ func NewClient(_ context.Context, logger zerolog.Logger, src specs.Source, opts
// sp.Conf(&api.RequestConfig{Context: ctx}) // for some reason gets context cancelled immediately

client := &Client{
lists: lists.NewLists(sp, logger),
lists: lists.NewLists(sp, logger),
mmd: mmd.NewMMD(sp, logger),

source: src,
opts: opts,
}

client.Tables = make(schema.Tables, 0, len(spec.Lists))

// Managed metadata tables prepare
for termSetID, mmdSpec := range spec.MMD {
table, err := client.mmd.GetDestTable(termSetID, mmdSpec)
if err != nil {
return nil, fmt.Errorf("failed to get table from term set \"%s\": %w", termSetID, err)
}
if table != nil {
logger.Debug().Str("table", table.Name).Str("termset", termSetID).Str("columns", table.Columns.String()).Msg("columns for table")
client.Tables = append(client.Tables, table)
}
}

// Lists tables prepare
for listURI, listSpec := range spec.Lists {
table, err := client.lists.GetDestTable(listURI, listSpec)
if err != nil {
return nil, fmt.Errorf("failed to get table from list: %w", err)
return nil, fmt.Errorf("failed to get table from list \"%s\": %w", listURI, err)
}
if table != nil {
logger.Debug().Str("table", table.Name).Str("list", listURI).Str("columns", table.Columns.String()).Msg("columns for table")
Expand Down
16 changes: 16 additions & 0 deletions client/spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"github.com/cloudquery/plugin-sdk/specs"
"github.com/koltyakov/cq-source-sharepoint/resources/auth"
"github.com/koltyakov/cq-source-sharepoint/resources/lists"
"github.com/koltyakov/cq-source-sharepoint/resources/mmd"
)

// Spec is the configuration for a SharePoint source
Expand All @@ -17,6 +18,9 @@ type Spec struct {
// A map of URIs to the list configuration
// If no lists are provided, nothing will be fetched
Lists map[string]lists.Spec `json:"lists"`

// A map of TermSets GUIDs to the MMD configuration
MMD map[string]mmd.Spec `json:"mmd"`
}

// SetDefaults sets default values for top level spec
Expand Down Expand Up @@ -52,6 +56,18 @@ func (s *Spec) Validate() error {
aliases[alias] = true
}

// All term sets should have unique aliases
for terSetID, mmdSpec := range s.MMD {
alias := strings.ToLower("mmd_" + mmdSpec.Alias)
if mmdSpec.Alias == "" {
alias = strings.ToLower("mmd_" + strings.ReplaceAll(terSetID, "-", ""))
}
if _, ok := aliases[alias]; ok {
return fmt.Errorf("duplicate alias \"%s\" for term set \"%s\" configuration", alias, terSetID)
}
aliases[alias] = true
}

return nil
}

Expand Down
15 changes: 13 additions & 2 deletions client/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,23 @@ func (c *Client) Sync(ctx context.Context, metrics *source.Metrics, res chan<- *
}
}

// ToDo: Separate tables for different types of resources
for _, table := range c.Tables {
// Lists sync
for tableName := range c.lists.TablesMap {
table := c.Tables.Get(tableName)
m := metrics.TableClient[table.Name][c.ID()]
if err := c.lists.Sync(ctx, m, res, table); err != nil {
return fmt.Errorf("syncing table %s: %w", table.Name, err)
}
}

// MMD (Terms from TermSets) sync
for tableName := range c.mmd.TablesMap {
table := c.Tables.Get(tableName)
m := metrics.TableClient[table.Name][c.ID()]
if err := c.mmd.Sync(ctx, m, res, table); err != nil {
return fmt.Errorf("syncing table %s: %w", table.Name, err)
}
}

return nil
}
3 changes: 2 additions & 1 deletion debug/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ db/
*.log
.cq/
db.sql
.env
.env
db.sql-journal
3 changes: 3 additions & 0 deletions debug/sharepoint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,6 @@ spec:
- OrderDate
- Total
alias: "order"
mmd:
8ed8c9ea-7052-4c1d-a4d7-b9c10bffea6f:
alias: "department"
6 changes: 3 additions & 3 deletions resources/lists/lists.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ type Lists struct {
sp *api.SP
logger zerolog.Logger

tablesMap map[string]Model // normalized table name to table metadata (map[CQ Table Name]Model)
TablesMap map[string]Model // normalized table name to table metadata (map[CQ Table Name]Model)
}

type Model struct {
Expand All @@ -28,7 +28,7 @@ func NewLists(sp *api.SP, logger zerolog.Logger) *Lists {
return &Lists{
sp: sp,
logger: logger,
tablesMap: map[string]Model{},
TablesMap: map[string]Model{},
}
}

Expand Down Expand Up @@ -107,7 +107,7 @@ func (l *Lists) GetDestTable(listURI string, spec Spec) (*schema.Table, error) {
model.FieldsMap[col.Name] = prop
}

l.tablesMap[table.Name] = *model
l.TablesMap[table.Name] = *model

return table, nil
}
Expand Down
2 changes: 1 addition & 1 deletion resources/lists/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import (
)

func (l *Lists) Sync(ctx context.Context, metrics *source.TableClientMetrics, res chan<- *schema.Resource, table *schema.Table) error {
opts := l.tablesMap[table.Name]
opts := l.TablesMap[table.Name]
logger := l.logger.With().Str("table", table.Name).Logger()

logger.Debug().Strs("cols", opts.Spec.Select).Msg("selecting columns from list")
Expand Down
91 changes: 91 additions & 0 deletions resources/mmd/mmd.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package mmd

import (
"strings"

"github.com/cloudquery/plugin-sdk/schema"
"github.com/koltyakov/cq-source-sharepoint/internal/util"
"github.com/koltyakov/gosip/api"
"github.com/rs/zerolog"
)

type MMD struct {
sp *api.SP
logger zerolog.Logger

TablesMap map[string]Model // normalized table name to table metadata (map[CQ Table Name]Model)
}

type Model struct {
ID string
Spec Spec
FieldsMap map[string]string // cq column name to column metadata
}

func NewMMD(sp *api.SP, logger zerolog.Logger) *MMD {
return &MMD{
sp: sp,
logger: logger,
TablesMap: map[string]Model{},
}
}

func (m *MMD) GetDestTable(terSetID string, spec Spec) (*schema.Table, error) {
tableName := util.NormalizeEntityName(strings.ReplaceAll(terSetID, "-", "")) // ToDo: ${TertGoup}_${TermSetName}
if spec.Alias != "" {
tableName = util.NormalizeEntityName(spec.Alias)
}

table := &schema.Table{
Name: "sharepoint_mmd_" + tableName,
Description: "", // TermSetName
Columns: []schema.Column{
{Name: "id", Type: schema.TypeUUID, CreationOptions: schema.ColumnCreationOptions{PrimaryKey: true}},
{Name: "name", Type: schema.TypeString},
{Name: "description", Type: schema.TypeString},
{Name: "tagging", Type: schema.TypeBool},
{Name: "deprecated", Type: schema.TypeBool},
{Name: "pinned", Type: schema.TypeBool},
{Name: "reused", Type: schema.TypeBool},
{Name: "root", Type: schema.TypeBool},
{Name: "source", Type: schema.TypeBool},
{Name: "path", Type: schema.TypeStringArray},
{Name: "children", Type: schema.TypeInt},
{Name: "merged", Type: schema.TypeUUIDArray},
{Name: "shared_props", Type: schema.TypeJSON},
{Name: "local_props", Type: schema.TypeJSON},
{Name: "custom_sort", Type: schema.TypeUUIDArray},
{Name: "owner", Type: schema.TypeString},
{Name: "created", Type: schema.TypeTimestamp},
{Name: "modified", Type: schema.TypeTimestamp},
},
}

// ToDo: Remove this reverce mapping
m.TablesMap[table.Name] = Model{
ID: terSetID,
Spec: spec,
FieldsMap: map[string]string{
"id": "Id",
"name": "Name",
"description": "Description",
"tagging": "IsAvailableForTagging",
"deprecated": "IsDeprecated",
"pinned": "IsPinned",
"reused": "IsReused",
"root": "IsRoot",
"source": "IsSourceTerm",
"path": "PathOfTerm",
"children": "TermsCount",
"merged": "MergedTermIds",
"shared_props": "CustomProperties",
"local_props": "LocalCustomProperties",
"custom_sort": "CustomSortOrder",
"owner": "Owner",
"created": "CreatedDate",
"modified": "LastModifiedDate",
},
}

return table, nil
}
11 changes: 11 additions & 0 deletions resources/mmd/spec.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package mmd

// Spec is the configuration for MMD term set source
type Spec struct {
// Optional, an alias for the table name
// Don't map different lists to the same table - such scenariou is not supported
Alias string `json:"alias"`
}

// SetDefault sets default values for list spec
// func (s *Spec) SetDefault() {}
Loading

0 comments on commit 9ab5b5f

Please sign in to comment.