4242IOWORKERS = int (os .environ .get ("SHADOWMIRE_IOWORKERS" , "2" ))
4343# A safety net -- to avoid upstream issues casuing too many packages removed when determinating sync plan.
4444MAX_DELETION = int (os .environ .get ("SHADOWMIRE_MAX_DELETION" , "50000" ))
45+ # Sometimes PyPI is not consistent -- new packages could not be fetched. This option tries to avoid permanently mark that kind of package as nonexist.
46+ IGNORE_THRESHOLD = int (os .environ .get ("SHADOWMIRE_IGNORE_THRESHOLD" , "1024" ))
4547
4648# https://github.com/pypa/bandersnatch/blob/a05af547f8d1958217ef0dc0028890b1839e6116/src/bandersnatch_filter_plugins/prerelease_name.py#L18C1-L23C6
4749PRERELEASE_PATTERNS = (
@@ -115,6 +117,13 @@ def remove(self, key: str) -> None:
115117 cur .execute ("DELETE FROM local WHERE key = ?" , (key ,))
116118 self .conn .commit ()
117119
120+ def remove_invalid (self ) -> int :
121+ cur = self .conn .cursor ()
122+ cur .execute ("DELETE FROM local WHERE value = -1" )
123+ rowcnt = cur .rowcount
124+ self .conn .commit ()
125+ return rowcnt
126+
118127 def nuke (self , commit : bool = True ) -> None :
119128 cur = self .conn .cursor ()
120129 cur .execute ("DELETE FROM local" )
@@ -337,6 +346,9 @@ def list_packages_with_serial(self, do_normalize: bool = True) -> dict[str, int]
337346 del ret [key ]
338347 return ret
339348
349+ def changelog_last_serial (self ) -> int :
350+ return self .xmlrpc_client .changelog_last_serial () # type: ignore
351+
340352 def get_package_metadata (self , package_name : str ) -> dict :
341353 req = self .session .get (urljoin (self .host , f"pypi/{ package_name } /json" ))
342354 if req .status_code == 404 :
@@ -828,15 +840,18 @@ def __init__(
828840 ) -> None :
829841 self .pypi = PyPI ()
830842 self .session = create_requests_session ()
843+ self .last_serial : Optional [int ] = None
844+ self .remote_packages : Optional [dict [str , int ]] = None
831845 super ().__init__ (basedir , local_db , sync_packages )
832846
833847 def fetch_remote_versions (self ) -> dict [str , int ]:
834- ret = self .pypi .list_packages_with_serial ()
835- logger .info ("Remote has %s packages" , len (ret ))
848+ self .last_serial = self .pypi .changelog_last_serial ()
849+ self .remote_packages = self .pypi .list_packages_with_serial ()
850+ logger .info ("Remote has %s packages" , len (self .remote_packages ))
836851 with overwrite (self .basedir / "remote.json" ) as f :
837- json .dump (ret , f )
852+ json .dump (self . remote_packages , f )
838853 logger .info ("File saved to remote.json." )
839- return ret
854+ return self . remote_packages
840855
841856 def do_update (
842857 self ,
@@ -852,9 +867,31 @@ def do_update(
852867 meta_original = deepcopy (meta )
853868 logger .debug ("%s meta: %s" , package_name , meta )
854869 except PackageNotFoundError :
870+ if (
871+ self .remote_packages is not None
872+ and package_name in self .remote_packages
873+ ):
874+ recorded_serial = self .remote_packages [package_name ]
875+ else :
876+ recorded_serial = None
877+ if (
878+ recorded_serial is not None
879+ and self .last_serial is not None
880+ and abs (recorded_serial - self .last_serial ) < IGNORE_THRESHOLD
881+ ):
882+ logger .warning (
883+ "%s missing from upstream (its serial %s, remote last serial %s), try next time..." ,
884+ package_name ,
885+ recorded_serial ,
886+ self .last_serial ,
887+ )
888+ return None
889+
855890 logger .warning (
856- "%s missing from upstream, remove and ignore in the future." ,
891+ "%s missing from upstream (its serial %s, remote last serial %s) , remove and ignore in the future." ,
857892 package_name ,
893+ recorded_serial ,
894+ self .last_serial ,
858895 )
859896 # try remove it locally, if it does not exist upstream
860897 self .do_remove (package_name , use_db = False )
@@ -1446,5 +1483,13 @@ def list_packages_with_serial(ctx: click.Context) -> None:
14461483 syncer .fetch_remote_versions ()
14471484
14481485
1486+ @cli .command (help = "Clear invalid package status in local database" )
1487+ @click .pass_context
1488+ def clear_invalid_packages (ctx : click .Context ) -> None :
1489+ local_db : LocalVersionKV = ctx .obj ["local_db" ]
1490+ total = local_db .remove_invalid ()
1491+ logger .info ("Removed %s invalid status in local database" , total )
1492+
1493+
14491494if __name__ == "__main__" :
14501495 cli (obj = {})
0 commit comments