From 78c446e5a31c45bd5d717e692885d24ca06d796d Mon Sep 17 00:00:00 2001 From: Grahame Grieve Date: Sat, 7 Sep 2024 17:11:23 +0800 Subject: [PATCH] Add broken dependency analysis --- .gitignore | 2 + server/endpoint_packages.pas | 214 +++++++++++++++++++++++++++++--- server/package_spider.pas | 126 +++++++++++-------- server/web/packages-broken.html | 23 ++++ server/web/packages-home.html | 2 +- server/web/packages-search.html | 2 +- 6 files changed, 297 insertions(+), 72 deletions(-) create mode 100644 server/web/packages-broken.html diff --git a/.gitignore b/.gitignore index a49a6906e..2ebbd3abe 100644 --- a/.gitignore +++ b/.gitignore @@ -99,3 +99,5 @@ exec/cert/ *.fpc up_props.sh + +server/web/fhirserver.web diff --git a/server/endpoint_packages.pas b/server/endpoint_packages.pas index 105aa8ca9..f7f62e67a 100644 --- a/server/endpoint_packages.pas +++ b/server/endpoint_packages.pas @@ -77,7 +77,7 @@ TFHIRPackageWebServer = class (TFhirWebServerEndpoint) FNextScan : TDateTIme; FScanning: boolean; FSystemToken : String; - FCrawlerLog : String; + FCrawlerLog : TJsonObject; procedure setDB(value : TFDBManager); function status : String; @@ -95,7 +95,8 @@ TFHIRPackageWebServer = class (TFhirWebServerEndpoint) procedure serveSearch(name, canonicalPkg, canonicalUrl, FHIRVersion, dependency, sort : String; secure : boolean; request : TIdHTTPRequestInfo; response : TIdHTTPResponseInfo); procedure serveUpdates(date : TFslDateTime; secure : boolean; response : TIdHTTPResponseInfo); procedure serveProtectForm(request : TIdHTTPRequestInfo; response : TIdHTTPResponseInfo; id : String); - procedure serveLog(request : TIdHTTPRequestInfo; response : TIdHTTPResponseInfo; id : String); + procedure serveLog(request : TIdHTTPRequestInfo; response : TIdHTTPResponseInfo); + procedure serveBroken(request : TIdHTTPRequestInfo; response : TIdHTTPResponseInfo; filter : String); procedure serveUpload(request : TIdHTTPRequestInfo; response : TIdHTTPResponseInfo; secure : boolean; id : String); procedure processProtectForm(request : TIdHTTPRequestInfo; response : TIdHTTPResponseInfo; id, pword : String); procedure SetScanning(const Value: boolean); @@ -421,7 +422,6 @@ procedure TPackageUpdaterThread.RunUpdater; try upd := TPackageUpdater.Create(FZulip.link); try - upd.CrawlerLog := TFslStringBuilder.create; upd.OnSendEmail := doSendEmail; try upd.update(conn); @@ -437,7 +437,7 @@ procedure TPackageUpdaterThread.RunUpdater; Logging.log('Exception updating packages: '+e.Message); end; end; - FEndPoint.FPackageServer.FCrawlerLog := upd.CrawlerLog.AsString; + FEndPoint.FPackageServer.FCrawlerLog := upd.CrawlerLog.Link; finally upd.free; end; @@ -811,20 +811,98 @@ procedure TFHIRPackageWebServer.serveProtectForm(request : TIdHTTPRequestInfo; r end; end; -procedure TFHIRPackageWebServer.serveLog(request: TIdHTTPRequestInfo; response: TIdHTTPResponseInfo; id: String); +function colorForStatus(s : String) : String; +begin + if s = 'error' then + result := 'maroon' + else if s = 'warning' then + result := 'navy' + else + result := 'black' +end; + +procedure TFHIRPackageWebServer.serveLog(request: TIdHTTPRequestInfo; response: TIdHTTPResponseInfo); var vars : TFslMap; + html : TFslStringBuilder; + feed, item : TJsonObject; + msgs : TJsonArray; + allOK : boolean; + i : integer; begin response.ResponseNo := 200; response.ResponseText := 'OK'; - vars := TFslMap.Create; - try - vars.add('prefix', TFHIRObjectText.Create(AbsoluteUrl(false))); - vars.add('ver', TFHIRObjectText.Create('4.0.1')); - vars.add('log', TFHIRObjectText.Create(FCrawlerLog)); - returnFile(request, response, nil, request.Document, 'packages-log.html', false, vars); - finally - vars.free; + + if (request.Accept.contains('/html')) then + begin + html := TFslStringBuilder.create; + try + if FCrawlerLog.has('status') then + html.append('

The Crawler has not yet completed processing the feeds

'#13#10) + else + begin + html.append('

Feeds from '+FormatTextToHTML(FCrawlerLog['master'])+' ('+FormatTextToHTML(FCrawlerLog['run-time'])+')

'#13#10); + for feed in FCrawlerLog.arr['feeds'].asObjects do + begin + html.append('

'+FormatTextToHTML(feed['url'])+' ('+FormatTextToHTML(feed['fetch-time'])+')

'#13#10); + html.append('
    '#13#10); + allOk := true; + for item in feed.arr['items'].asObjects do + begin + if item['status'] = 'Already Processed' then + begin + // nothing + end + else + begin + html.append('
  • '+item['guid']+': '); + + if item['status'] = 'Fetched' then + html.append(''); + end; + html.append(item['status']+''); + if (item.has('messages')) then + begin + msgs := item.arr['messages']; + if (msgs.Count = 1) then + html.append('. '+FormatTextToHTML(msgs.Obj[0]['message'])+'') + else + begin + html.append('
      '); + for i := 0 to msgs.Count - 1 do + html.append('
    • '+FormatTextToHTML(msgs.Obj[0]['message'])+'
    • '); + html.append('
    '#13#10); + end + end; + html.append('
  • '#13#10); + end; + end; + if (allOK) then + html.append('
  • All OK
  • '#13#10); + html.append('
'#13#10); + end; + end; + vars := TFslMap.Create; + try + vars.add('prefix', TFHIRObjectText.Create(AbsoluteUrl(false))); + vars.add('ver', TFHIRObjectText.Create('4.0.1')); + vars.add('log', TFHIRObjectText.Create(html.ToString)); + returnFile(request, response, nil, request.Document, 'packages-log.html', false, vars); + finally + vars.free; + end + finally + html.free; + end; + end + else + begin + response.ContentType := 'application/json'; + response.ContentText := TJsonWriterDirect.writeObjectStr(FCrawlerLog, true); end; end; @@ -930,6 +1008,107 @@ procedure TFHIRPackageWebServer.serveVersions(id, sort : String; secure : boolea end; end; +procedure TFHIRPackageWebServer.serveBroken(request : TIdHTTPRequestInfo; response : TIdHTTPResponseInfo; filter : String); +var + conn : TFDBConnection; + json, v, dist: TJsonObject; + src, name, dep, ver : String; + vars : TFslMap; + list : TJsonArray; + html : TFslStringBuilder; + i : integer; + ids : TStringList; +begin + conn := FDB.getConnection('Package.server.broken'); + try + // conn.sql := 'select Id || ''#'' || version as Source, Dependency from PackageDependencies, PackageVersions where PackageDependencies.PackageVersionKey = PackageVersions.PackageVersionKey and Dependency not in (select Id || ''#'' || version from PackageVersions) order by Source'; + json := TJsonObject.Create; + try + ids := TStringList.create; + try + conn.sql := 'select Id, Version from PackageVersions'; + conn.prepare; + conn.Execute; + while conn.FetchNext do + ids.add(conn.ColStringByName['Id']+'#'+ TSemanticVersion.getMajMin(conn.ColStringByName['Version'])); + conn.terminate; + ids.sort; + conn.sql := 'select Id || ''#'' || version as Source, Dependency from PackageDependencies, PackageVersions where PackageDependencies.PackageVersionKey = PackageVersions.PackageVersionKey'; + conn.prepare; + conn.Execute; + while conn.FetchNext do + begin + if (filter = '') or (conn.ColStringByName['Source'].contains(filter)) then + begin + dep := conn.ColStringByName['Dependency']; + ver := TSemanticVersion.getMajMin(dep.substring(dep.indexOf('#')+1)); + if ids.IndexOf(dep.substring(0, dep.indexOf('#')+1)+ver) = -1 then + begin + list := json.forceArr[conn.ColStringByName['Source']]; + list.add(conn.ColStringByName['Dependency']); + end; + end; + end; + finally + ids.free; + end; + + response.ResponseNo := 200; + response.ResponseText := 'OK'; + if (request.Accept.contains('/html')) then + begin + html := TFslStringBuilder.create; + try + html.Append(''#13#10); + html.Append(''#13#10); + for name in json.properties.SortedKeys do + begin + list := json.arr[name]; + html.Append(''#13#10); + end; + html.Append('
Source PackageBroken Dependencies
'+name+''); + for i := 0 to list.Count - 1 do + begin + if i > 0 then + html.append(', '); + html.append(list.Value[i]); + end; + html.append('
'#13#10); + vars := TFslMap.Create('vars'); + try + vars.add('prefix', TFHIRObjectText.Create(AbsoluteUrl(false))); + vars.add('ver', TFHIRObjectText.Create('4.0.1')); + vars.add('filter', TFHIRObjectText.Create(FormatTextToHTML(filter))); + vars.add('table', TFHIRObjectText.Create(html.ToString)); + vars.add('status', TFHIRObjectText.Create(status)); + returnFile(request, response, nil, request.Document, 'packages-broken.html', false, vars); + finally + vars.free; + end; + finally + html.free; + end; + end + else + begin + json.str['date'] := FormatDateTime('c', now); + response.ContentType := 'application/json'; + response.ContentText := TJsonWriterDirect.writeObjectStr(json, true); + end; + finally + json.free; + end; + conn.terminate; + conn.release; + except + on e : Exception do + begin + conn.error(e); + raise; + end; + end; +end; + function sel(this, that : String) : string; begin if (this = that) then @@ -1374,7 +1553,11 @@ function TFHIRPackageWebServer.doRequest(AContext: TIdContext; request: TIdHTTPR end else if (request.CommandType = hcGET) and (request.Document = '/packages/log') then begin - serveLog(request, response, pm['id']); + serveLog(request, response); + end + else if (request.CommandType = hcGET) and (request.Document = '/packages/broken') then + begin + serveBroken(request, response, pm['filter']); end else if (request.CommandType = hcGET) and (request.Document = '/packages/protect') then begin @@ -1436,7 +1619,8 @@ function TFHIRPackageWebServer.doRequest(AContext: TIdContext; request: TIdHTTPR constructor TFHIRPackageWebServer.Create(code, path: String; common: TFHIRWebServerCommon); begin inherited Create(code, path, common); - FCrawlerLog := 'The Crawler has not yet completed processing the feed'; + FCrawlerLog := TJsonObject.create; + FCrawlerLog['status'] := 'No crawl has completed yet'; end; function TFHIRPackageWebServer.SecureRequest(AContext: TIdContext; ip : String; request: TIdHTTPRequestInfo; response: TIdHTTPResponseInfo; cert: TIdOpenSSLX509; id: String; tt : TTimeTracker): String; diff --git a/server/package_spider.pas b/server/package_spider.pas index cf7d3dff2..8dd9e9a78 100644 --- a/server/package_spider.pas +++ b/server/package_spider.pas @@ -93,21 +93,21 @@ TPackageUpdater = class (TFslObject) FTotalBytes : Cardinal; FIni : TIniFile; FZulip : TZulipTracker; - FCrawlerLog : TFslStringBuilder; - FHasErrors : boolean; - procedure clog(s : String; mode : TCrawlerLogMode); + FCrawlerLog : TJsonObject; + procedure DoSendEmail(dest, subj, body : String); procedure log(msg, source : String; error : boolean); + procedure clog(clItem : TJsonObject; level, msg : String); function fetchUrl(url, mimetype : string) : TBytes; function fetchJson(url : string) : TJsonObject; function fetchXml(url : string) : TMXmlElement; function hasStored(guid : String) : boolean; - procedure SetCrawlerLog(AValue: TFslStringBuilder); - procedure store(source, url, guid : String; date : TFslDateTime; package : Tbytes; idver : String); + procedure SetCrawlerLog(AValue: TJsonObject); + procedure store(source, url, guid : String; date : TFslDateTime; package : Tbytes; idver : String; clItem : TJsonObject); - procedure updateItem(source : String; item : TMXmlElement; i : integer; pr : TPackageRestrictions); + procedure updateItem(source : String; item : TMXmlElement; i : integer; pr : TPackageRestrictions; clFeed : TJsonObject); procedure updateTheFeed(url, source, email : String; pr : TPackageRestrictions); public constructor Create(zulip : TZulipTracker); @@ -116,7 +116,7 @@ TPackageUpdater = class (TFslObject) procedure update(DB : TFDBConnection); property errors : String read FErrors; - property CrawlerLog : TFslStringBuilder read FCrawlerLog write SetCrawlerLog; + property CrawlerLog : TJsonObject read FCrawlerLog write SetCrawlerLog; property OnSendEmail : TSendEmailEvent read FOnSendEmail write FOnSendEmail; class procedure test(db : TFDBManager); @@ -191,6 +191,7 @@ constructor TPackageUpdater.Create(zulip: TZulipTracker); begin inherited Create; FZulip := zulip; + FCrawlerLog := TJsonObject.create; end; destructor TPackageUpdater.Destroy; @@ -200,21 +201,6 @@ destructor TPackageUpdater.Destroy; inherited; end; -procedure TPackageUpdater.clog(s: String; mode: TCrawlerLogMode); -begin - case mode of - clmStart: FCrawlerLog.append('

'+FormatTextToHTML(s)+'

'#13#10); - clmHeader: FCrawlerLog.append('

'+FormatTextToHTML(s)+'

'#13#10); - clmError: - begin - FCrawlerLog.append('
  • '+FormatTextToHTML(s)+'
  • '#13#10); - FHasErrors := true; - end; - clmWarning: FCrawlerLog.append('
  • '+FormatTextToHTML(s)+'
  • '#13#10); - clmNote: FCrawlerLog.append('
  • '+FormatTextToHTML(s)+'
  • '#13#10); - end; -end; - procedure TPackageUpdater.DoSendEmail(dest, subj, body: String); var dt : TDateTime; @@ -269,7 +255,7 @@ function TPackageUpdater.hasStored(guid: String): boolean; FDB.Terminate; end; -procedure TPackageUpdater.SetCrawlerLog(AValue: TFslStringBuilder); +procedure TPackageUpdater.SetCrawlerLog(AValue: TJsonObject); begin FCrawlerLog.free; FCrawlerLog:=AValue; @@ -332,12 +318,13 @@ class procedure TPackageUpdater.processURLs(npm : TNpmPackage; ts : TStringList) end; end; -procedure TPackageUpdater.store(source, url, guid: String; date : TFslDateTime; package: Tbytes; idver : String); +procedure TPackageUpdater.store(source, url, guid: String; date : TFslDateTime; package: Tbytes; idver : String; clItem : TJsonObject); var npm : TNpmPackage; id, version, description, canonical, fhirVersion : String; kind : TFHIRPackageKind; ts : TStringList; + cl : TJsonObject; begin if Logging.shuttingDown then Abort; @@ -348,7 +335,7 @@ procedure TPackageUpdater.store(source, url, guid: String; date : TFslDateTime; if (id+'#'+version <> idver) then begin log('Warning processing '+idver+': actually found '+id+'#'+version+' in the package', source, true); - clog(idver+': actually found '+id+'#'+version+' in the package', clmWarning); + clog(clItem, 'warning', 'actually found '+id+'#'+version+' in the package'); end; description := npm.description; @@ -357,7 +344,7 @@ procedure TPackageUpdater.store(source, url, guid: String; date : TFslDateTime; if npm.notForPublication then begin log('Warning processing '+idver+': this package is not suitable for publication (likely broken links)', source, true); - clog(idver+': not suitable for publication (likely broken links)', clmWarning); + clog(clItem, 'warning', 'not suitable for publication (likely broken links)'); end; fhirVersion := npm.fhirVersion; if not isValidPackageId(id) then @@ -367,7 +354,7 @@ procedure TPackageUpdater.store(source, url, guid: String; date : TFslDateTime; if (canonical = '') then begin log('Warning processing '+idver+': No canonical found in npm (from '+url+')', source, true); - clog(idver+': No canonical found in npm (from '+url+')', clmWarning); + clog(clItem, 'warning', 'No canonical found in npm (from '+url+')'); canonical := 'http://simplifier.net/packages/fictitious/'+id; end; if not isAbsoluteUrl(canonical) then @@ -411,16 +398,18 @@ procedure TPackageUpdater.update(DB : TFDBConnection); arr : TJsonArray; i : integer; pr : TPackageRestrictions; + start : UInt64; begin FIni := TIniFile.Create(tempFile('package-spider.ini')); try + start := GetTickCount64; log('Start Package Scan', '', false); FTotalBytes := 0; FErrors := ''; FDB := DB; try log('Fetch '+MASTER_URL, '', false); - clog('Master URL: '+MASTER_URL, clmStart); + FCrawlerLog.str['master'] := MASTER_URL; json := fetchJson(MASTER_URL); try pr := TPackageRestrictions.Create(json.arr['package-restrictions'].Link); @@ -434,15 +423,18 @@ procedure TPackageUpdater.update(DB : TFDBConnection); finally json.free; end; - clog('', clmHeader); + FCrawlerLog['run-time'] := DescribePeriodMS(GetTickCount64 - start); except on e : EAbort do begin - Log('Terminate Package Spider - shutting down', MASTER_URL, true) + Log('Terminate Package Spider - shutting down', MASTER_URL, true); + FCrawlerLog['run-time'] := DescribePeriodMS(GetTickCount64 - start); end; on e : Exception do begin - Log('Exception Processing Registry: '+e.Message, MASTER_URL, true) + Log('Exception Processing Registry: '+e.Message, MASTER_URL, true); + FCrawlerLog['run-time'] := DescribePeriodMS(GetTickCount64 - start); + FCrawlerLog['fatal-exception'] := e.Message; end; end; //try @@ -480,18 +472,20 @@ procedure TPackageUpdater.updateTheFeed(url, source, email: String; pr : TPackag channel : TMXmlElement; item : TMXmlElement; i : integer; + clFeed : TJsonObject; + start : UInt64; begin if Logging.shuttingDown then Abort; + clFeed := FCrawlerLog.forceArr['feeds'].addObject; + clFeed['url'] := url; + FFeedErrors := ''; + log('Fetch '+url, source, false); + start := GetTickCount64; try - clog('Process '+url, clmHeader); - FCrawlerLog.append('
      '#13#10); - log('Fetch '+url, source, false); - FFeedErrors := ''; - FHasErrors := false; - xml := fetchXml(url); try + clFeed['fetch-time'] := DescribePeriodMS(GetTickCount64 - start); for channel in xml.first.Children do begin if (channel.Name = 'channel') then @@ -501,7 +495,7 @@ procedure TPackageUpdater.updateTheFeed(url, source, email: String; pr : TPackag begin if (item.Name = 'item') then begin - updateItem(url, item, i, pr); + updateItem(url, item, i, pr, clFeed); inc(i); end; end; @@ -510,16 +504,13 @@ procedure TPackageUpdater.updateTheFeed(url, source, email: String; pr : TPackag finally xml.free; end; - if not FHasErrors then - clog('All OK', clmNote); if (FFeedErrors <> '') and (email <> '') then DoSendEmail(email, 'Errors Processing '+url, FFeedErrors); - FCrawlerLog.append('
    '#13#10); except on e : Exception do - begin - clog('Exception: '+e.Message, clmError); - FCrawlerLog.append(''#13#10); + begin + clFeed['exception'] := e.Message; + clFeed['fail-time'] := DescribePeriodMS(GetTickCount64 - start); log('Exception processing feed: '+url+': '+e.Message, source, false); if (email <> '') then DoSendEmail(email, 'Exception Processing '+url, e.Message); @@ -527,33 +518,44 @@ procedure TPackageUpdater.updateTheFeed(url, source, email: String; pr : TPackag end; end; -procedure TPackageUpdater.updateItem(source : String; item: TMXmlElement; i : integer; pr : TPackageRestrictions); +procedure TPackageUpdater.updateItem(source : String; item: TMXmlElement; i : integer; pr : TPackageRestrictions; clFeed : TJsonObject); var guid : String; content : TBytes; date : TFslDateTime; id, url, d, list: String; + clItem : TJsonObject; + start : UInt64; begin if Logging.shuttingDown then Abort; url := '[link not found]'; + clItem := clFeed.forceArr['items'].addObject; if item.element('guid') = nil then begin log('Error processing item from '+source+'#item['+inttostr(i)+']: no guid provided', source, true); - clog('item['+inttostr(i)+']: no guid provided', clmError); + clog(clItem, 'error', 'no guid provided'); exit; end; guid := item.element('guid').Text; + start := GetTickCount64; + clItem['guid'] := guid; + clItem['status'] := '??'; try id := item.element('title').Text; if (item.element('notForPublication') <> nil) and ('true' = item.element('notForPublication').text) then begin - clog(guid+': not for publication', clmError); + clItem['status'] := 'not for publication'; + clog(clItem, 'error', 'not for publication'); exit; end; if pr.isOk(id, source, list) then begin - if (not hasStored(guid)) then + if (hasStored(guid)) then + begin + clItem['status'] := 'Already Processed'; + end + else begin d := item.element('pubDate').Text.toLower.Replace(' ', ' '); if (d.substring(0, 6).contains(',')) then @@ -569,10 +571,12 @@ procedure TPackageUpdater.updateItem(source : String; item: TMXmlElement; i : in date := TFslDateTime.fromFormat('dd mmmm yyyy hh:nn:ss', d); end; url := fix(item.element('link').Text); - log('Fetch '+url, source, false); + log('Fetch '+url, source, false); + clItem['url'] := url; content := fetchUrl(url, 'application/tar+gzip'); - store(source, url, guid, date, content, id); - clog(guid+': Fetched '+url, clmNote); + store(source, url, guid, date, content, id, clItem); + clFeed['process-time'] := DescribePeriodMS(GetTickCount64 - start); + clItem['status'] := 'Fetched'; end; end else @@ -580,14 +584,18 @@ procedure TPackageUpdater.updateItem(source : String; item: TMXmlElement; i : in if not (source.contains('simplifier.net')) then begin log('The package '+id+' is not allowed to come from '+source+' (allowed: '+list+')', source, true); - clog(guid+': The package '+id+' is not allowed to come from '+source+' (allowed: '+list+')', clmError); - end; + clog(clItem, 'error', 'The package '+id+' is not allowed to come from '+source+' (allowed: '+list+')'); + clItem['status'] := 'prohibited source'; + end + else + clItem['status'] := 'ignored'; end; except on e : Exception do begin log('Exception processing item: '+guid+' from '+url+': '+e.Message, source, true); - clog(guid+': '+e.Message, clmError); + clItem['status'] := 'Exception'; + clog(clItem, 'error', e.Message); end; end; end; @@ -712,6 +720,14 @@ function genHash(bytes : TBytes) : String; hash.free; end; end; - + +procedure TPackageUpdater.clog(clItem : TJsonObject; level, msg : String); +var + cl : TJsonObject; +begin + cl := clItem.forceArr['messages'].addObject; + cl.vStr['type'] := level; + cl.vStr['message'] := msg; +end; end. diff --git a/server/web/packages-broken.html b/server/web/packages-broken.html new file mode 100644 index 000000000..9702e70e7 --- /dev/null +++ b/server/web/packages-broken.html @@ -0,0 +1,23 @@ + + + + + Package Server (FHIR Server [%id%] Version [%ver%]) + [%include head.html%] + + + +[%include top.html%] +

    FHIR Package Server - Broken Dependencies

    + +
    + Filter: + +
    +[%table%] + +[%include bottom.html%] + + + + diff --git a/server/web/packages-home.html b/server/web/packages-home.html index dcf4bc162..5c19eeb8b 100644 --- a/server/web/packages-home.html +++ b/server/web/packages-home.html @@ -34,7 +34,7 @@

    FHIR Package Server

    [%count%] Packages available. [%downloads%] Packages Downloaded. Status = [%status%]. Package History last 30 days

    -Last Package Crawler Log +Last Package Crawler Log | Broken Dependencies

    diff --git a/server/web/packages-search.html b/server/web/packages-search.html index 0af85515b..7e5bdcef2 100644 --- a/server/web/packages-search.html +++ b/server/web/packages-search.html @@ -11,7 +11,7 @@

    FHIR Package Server

    - Last Package Crawler Log + Last Package Crawler Log | Broken Dependencies