Skip to content

Commit

Permalink
feat: enhance repology exporter (#714)
Browse files Browse the repository at this point in the history
  • Loading branch information
saenai255 authored Jan 20, 2024
1 parent 12a2bd1 commit 4fa5dc6
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 22 deletions.
2 changes: 2 additions & 0 deletions server/model/repology_project.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package model

const RepologyProjectTableName = "repology_projects"

type RepologyProject struct {
Name string `gorm:"primaryKey"`
}
Expand Down
4 changes: 2 additions & 2 deletions server/repology/internal/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package internal

import (
"encoding/json"
"io/ioutil"
"io"
"net/http"
"net/url"
)
Expand All @@ -26,7 +26,7 @@ func getProjectSearch(projectName string) (RepologyApiProjectSearchResponse, err
return response, err
}

body, err := ioutil.ReadAll(resp.Body)
body, err := io.ReadAll(resp.Body)
if err != nil {
return response, err
}
Expand Down
76 changes: 58 additions & 18 deletions server/repology/internal/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package internal
import (
"errors"
"fmt"
"sync"
"strings"
"time"

"gorm.io/gorm"
Expand All @@ -19,20 +19,29 @@ func ExportRepologyDatabase(db *gorm.DB) error {

it := 1
lastProjectName := ""

const REPOLOGY_PROJECT_FETCH_THROTTLE = 400 * time.Millisecond

lastRepoFetch := time.Now()

for {
delay := makeSecondDelay()
defer delay.Wait()
if time.Since(lastRepoFetch) < REPOLOGY_PROJECT_FETCH_THROTTLE {
time.Sleep(REPOLOGY_PROJECT_FETCH_THROTTLE - time.Since(lastRepoFetch))
}

log.Debug("page %v | cursor at: %v", it, lastProjectName)
projectPage, err := getProjectSearch(lastProjectName)
if err != nil {
return errors.Join(errors.New("failed to fetch repology project page"), err)
}

var projectProviders []model.RepologyProjectProvider
lastRepoFetch = time.Now()

var projects []model.RepologyProject
var projectProviders []model.RepologyProjectProvider
for projectName, apiProjectProvider := range projectPage {
lastProjectName = projectName

lastProjectName = identityOrSkipProject(projectName)
for _, apiProjectProvider := range apiProjectProvider {
// Save project provider as inactive
projectProvider := mapRepologyApiProjectProviderToModel(projectName, apiProjectProvider)
Expand Down Expand Up @@ -64,12 +73,12 @@ func ExportRepologyDatabase(db *gorm.DB) error {
}

// Delete active (old) repology project providers
if err := db.Where(fmt.Sprintf("%v = ?", model.RepologyProjectProviderColumns.Active), true).Delete(&model.RepologyProjectProvider{}).Error; err != nil {
if err := db.Debug().Where(fmt.Sprintf("%v = ?", model.RepologyProjectProviderColumns.Active), true).Delete(&model.RepologyProjectProvider{}).Error; err != nil {
return errors.Join(errors.New("failed to delete old repology project providers"), err)
}

// Mark new repology project providers as active
if err := db.Exec(
if err := db.Debug().Exec(
fmt.Sprintf(
"UPDATE %s SET %s = 1",
model.RepologyProjectProviderTableName,
Expand All @@ -82,32 +91,63 @@ func ExportRepologyDatabase(db *gorm.DB) error {
return nil
}

var projectNamesToSkipToNextCussor = map[string]string{
"emacs:": "emacsa",
"go:": "goa",
"haskell:": "haskella",
"lisp:": "lispa",
"node:": "nodea",
"ocaml:": "ocamla",
"perl:": "perla",
"php:": "phpa",
"python:": "pythona",
"r:": "ra",
"ruby:": "rubya",
"rust:": "rusta",
"texlive:": "texlivea",
}

func identityOrSkipProject(name string) string {
for prefix, skipTo := range projectNamesToSkipToNextCussor {
if strings.HasPrefix(name, prefix) {
return skipTo
}
}

return name
}

func migrateTables(db *gorm.DB) error {
err := db.AutoMigrate(&model.RepologyProject{})
if err != nil {
return err
}

if err = truncateTable(db, model.RepologyProjectTableName); err != nil {
return err
}

err = db.AutoMigrate(&model.RepologyProjectProvider{})
if err != nil {
return err
}

if err = truncateTable(db, model.RepologyProjectProviderTableName); err != nil {
return err
}

return nil
}

func makeSecondDelay() *sync.WaitGroup {
var delay sync.WaitGroup
delay.Add(1)

go func() {
defer delay.Done()
// Wait 750ms before making another request
// Repology API has a rate limit of 1 request per second but some requests take longer than 1 second so it averages out
time.Sleep(750 * time.Millisecond)
}()
func truncateTable(db *gorm.DB, tableName string) error {
log.Debug("attempting to truncate table %v", tableName)
err := db.Exec("TRUNCATE TABLE " + tableName).Error
if err != nil {
return errors.Join(fmt.Errorf("failed to truncate table %v", tableName), err)
}

return &delay
log.Info("successfully truncated table %v", tableName)
return nil
}

func mapRepologyApiProjectProviderToModel(projectName string, apiProjectProvider RepologyApiProject) model.RepologyProjectProvider {
Expand Down
4 changes: 2 additions & 2 deletions server/repology/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,17 @@ import (
)

func ScheduleRefresh(every time.Duration) {
db := model.Instance()
go func() {

for {
db := model.Instance()
log.Info("refreshing Repology database...")
err := ExportRepologyDatabase(db)
if err != nil {
log.Error("failed to export Repology projects: %v", err)
} else {
log.Info("repology database refreshed successfully")
}

time.Sleep(every)
}
}()
Expand Down

0 comments on commit 4fa5dc6

Please sign in to comment.