forked from ossf/package-analysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathload.go
107 lines (84 loc) · 2.65 KB
/
load.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
package loader
import (
"context"
_ "embed"
"fmt"
"os"
"cloud.google.com/go/bigquery"
)
//go:embed dynamic-analysis-schema.json
var dynamicAnalysisSchemaJSON []byte
//go:embed static-analysis-schema.json
var staticAnalysisSchemaJSON []byte
type PubSubMessage struct {
Data []byte `json:"data"`
}
func runAndWaitForJob(ctx context.Context, loader *bigquery.Loader) error {
job, err := loader.Run(ctx)
if err != nil {
return fmt.Errorf("failed to create load job: %v", err)
}
fmt.Printf("load job created: %s\n", job.ID())
status, err := job.Wait(ctx)
if err != nil {
return fmt.Errorf("error waiting for job: %w", err)
}
if status.Err() != nil {
fmt.Printf("job completed with %d errors\n", len(status.Errors))
for idx, err := range status.Errors {
fmt.Printf("error %d: %v\n", idx, err)
}
return status.Err()
}
return nil
}
func Load(ctx context.Context, m PubSubMessage) error {
project := os.Getenv("GCP_PROJECT")
bucket := os.Getenv("OSSF_MALWARE_ANALYSIS_RESULTS")
bq, err := bigquery.NewClient(ctx, project)
if err != nil {
return fmt.Errorf("failed to create BigQuery client: %w", err)
}
defer bq.Close()
schema, err := bigquery.SchemaFromJSON(dynamicAnalysisSchemaJSON)
if err != nil {
return fmt.Errorf("failed to decode schema: %w", err)
}
gcsRef := bigquery.NewGCSReference(fmt.Sprintf("gs://%s/*.json", bucket))
gcsRef.Schema = schema
gcsRef.SourceFormat = bigquery.JSON
gcsRef.MaxBadRecords = 10000
dataset := bq.Dataset("packages")
loader := dataset.Table("analysis").LoaderFrom(gcsRef)
loader.WriteDisposition = bigquery.WriteTruncate
loader.TimePartitioning = &bigquery.TimePartitioning{
Type: bigquery.DayPartitioningType,
Field: "CreatedTimestamp",
}
return runAndWaitForJob(ctx, loader)
}
func LoadStaticAnalysis(ctx context.Context, m PubSubMessage) error {
project := os.Getenv("GCP_PROJECT")
bucket := os.Getenv("OSSF_MALWARE_STATIC_ANALYSIS_RESULTS")
bq, err := bigquery.NewClient(ctx, project)
if err != nil {
return fmt.Errorf("failed to create BigQuery client: %w", err)
}
defer bq.Close()
schema, err := bigquery.SchemaFromJSON(staticAnalysisSchemaJSON)
if err != nil {
return fmt.Errorf("failed to decode schema: %w", err)
}
gcsRef := bigquery.NewGCSReference(fmt.Sprintf("gs://%s/*.json", bucket))
gcsRef.Schema = schema
gcsRef.SourceFormat = bigquery.JSON
gcsRef.MaxBadRecords = 10000
dataset := bq.Dataset("packages")
loader := dataset.Table("staticanalysis").LoaderFrom(gcsRef)
loader.WriteDisposition = bigquery.WriteTruncate
loader.TimePartitioning = &bigquery.TimePartitioning{
Type: bigquery.DayPartitioningType,
Field: "created",
}
return runAndWaitForJob(ctx, loader)
}