Skip to content

Commit

Permalink
Modified the KaiserURLWebscraper to click buttons and expand the desi…
Browse files Browse the repository at this point in the history
…red web content before scraping
  • Loading branch information
rishi-salunkhe-mettle committed Aug 26, 2024
1 parent ccc02bb commit ea348fd
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,8 @@ var anthemURL = "https://patient360.anthem.com/P360Member/fhir"
// var guidewellURL = "https://developer.bcbsfl.com/interop/interop-developer-portal/product/469/api/466#/PatientAccessAPI_105/overview"
// var hcscURL = "https://interoperability.hcsc.com/s/patient-access-api"
// var humanaURL = "https://developers.humana.com/patient-api/doc"
// var kaiserURL = "https://developer.kp.org/#/apis/639c015049655aa96ab5b2f1"
var kaiserURL = "https://developer.kp.org/#/apis/639c015049655aa96ab5b2f1"

// var molinaURL = "https://developer.interop.molinahealthcare.com/api-details#api=patient-access&operation=5f72ab665269f310ef58b361"
var unitedHealthURL = "https://www.uhc.com/legal/interoperability-apis"
var meldrxURL = "https://app.meldrx.com/api/Directories/fhir/endpoints"
Expand Down Expand Up @@ -512,8 +513,8 @@ func QueryCHPLEndpointList(chplURL string, fileToWriteTo string) {
// HcscURLWebscraper(chplURL, fileToWriteTo)
// } else if URLsEqual(chplURL, humanaURL) {
// HumanaURLWebscraper(chplURL, fileToWriteTo)
//} else if URLsEqual(chplURL, kaiserURL) {
//KaiserURLWebscraper(chplURL, fileToWriteTo)
} else if URLsEqual(chplURL, kaiserURL) {
KaiserURLWebscraper(chplURL, fileToWriteTo)
// } else if URLsEqual(chplURL, molinaURL) {
// MolinaURLWebscraper(chplURL, fileToWriteTo)
} else if URLsEqual(chplURL, unitedHealthURL) {
Expand Down
59 changes: 56 additions & 3 deletions endpointmanager/pkg/chplendpointquerier/kaiserwebscraper.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
package chplendpointquerier

import (
"context"
"strings"
"time"

"github.com/PuerkitoBio/goquery"
"github.com/onc-healthit/lantern-back-end/endpointmanager/pkg/helpers"
"github.com/chromedp/chromedp"
log "github.com/sirupsen/logrus"
)

Expand All @@ -13,7 +15,7 @@ func KaiserURLWebscraper(CHPLURL string, fileToWriteTo string) {
var lanternEntryList []LanternEntry
var endpointEntryList EndpointList

doc, err := helpers.ChromedpQueryEndpointList(CHPLURL, ".language-json")
doc, err := KaiserChromedpQueryEndpointList(CHPLURL, ".opblock-tag-section")
if err != nil {
log.Fatal(err)
}
Expand All @@ -25,9 +27,10 @@ func KaiserURLWebscraper(CHPLURL string, fileToWriteTo string) {
found = true
}
if found {
if strings.Contains(spanhtml.Text(), "/FHIR/api") {
if strings.HasSuffix(spanhtml.Text(), "/FHIR/api\"") {
var entry LanternEntry
URL := strings.TrimSpace(spanhtml.Text())
URL = strings.ReplaceAll(URL, "\"", "")
entry.URL = URL
processed = true
lanternEntryList = append(lanternEntryList, entry)
Expand All @@ -49,3 +52,53 @@ func KaiserURLWebscraper(CHPLURL string, fileToWriteTo string) {
}

}

// KaiserChromedpQueryEndpointList queries the given endpoint list and clicks buttons using chromedp and returns the html document
func KaiserChromedpQueryEndpointList(endpointListURL string, waitVisibleElement string) (*goquery.Document, error) {

ctx, cancel := chromedp.NewContext(context.Background())
defer cancel()

timeoutContext, cancel := context.WithTimeout(ctx, 90*time.Second)
defer cancel()

var htmlContent string
var err error

if len(waitVisibleElement) > 0 {
// Chromedp will wait a max of 30 seconds for webpage to run javascript code to generate api search results before grapping HTML
err = chromedp.Run(timeoutContext,
chromedp.Navigate(endpointListURL),
chromedp.WaitVisible(waitVisibleElement, chromedp.ByQuery),

// Expand the Metadata section
chromedp.WaitVisible(`.expand-operation`),
chromedp.Click(`.expand-operation`, chromedp.ByQuery),

// Expand the Metadata endpoint section
chromedp.WaitVisible(`.opblock-summary-control`),
chromedp.Click(`.opblock-summary-control`, chromedp.ByQuery),

// Wait till the code snippet is rendered
chromedp.WaitVisible(`.language-json`),

chromedp.OuterHTML("html", &htmlContent, chromedp.ByQuery),
)
} else {
err = chromedp.Run(timeoutContext,
chromedp.Navigate(endpointListURL),
chromedp.OuterHTML("html", &htmlContent, chromedp.ByQuery),
)
}

if err != nil {
return nil, err
}

doc, err := goquery.NewDocumentFromReader(strings.NewReader(htmlContent))
if err != nil {
return nil, err
}

return doc, nil
}

0 comments on commit ea348fd

Please sign in to comment.