Skip to content

Commit

Permalink
Added the actual project
Browse files Browse the repository at this point in the history
  • Loading branch information
Chaiavi authored May 8, 2022
1 parent 981bbcf commit 1ec0d6d
Show file tree
Hide file tree
Showing 4 changed files with 221 additions and 0 deletions.
19 changes: 19 additions & 0 deletions 16c-Downloader.iml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
<output url="file://$MODULE_DIR$/target/classes" />
<output-test url="file://$MODULE_DIR$/target/test-classes" />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="Maven: org.jsoup:jsoup:1.14.3" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-api:2.0.0-alpha7" level="project" />
<orderEntry type="library" name="Maven: ch.qos.logback:logback-classic:1.3.0-alpha14" level="project" />
<orderEntry type="library" name="Maven: ch.qos.logback:logback-core:1.3.0-alpha14" level="project" />
</component>
</module>
65 changes: 65 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>org.chaiware</groupId>
<artifactId>16c-Archive-Downloader</artifactId>
<version>0.9</version>
<packaging>jar</packaging>

<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>

<dependencies>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.14.3</version>
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>2.0.0-alpha7</version>
</dependency>

<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>1.3.0-alpha14</version>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
<configuration>
<archive>
<manifest>
<mainClass>
org.chaiware.download16c.Download16c
</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
105 changes: 105 additions & 0 deletions src/main/java/org/chaiware/download16c/Download16c.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package org.chaiware.download16c;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
import java.time.Duration;
import java.time.Instant;
import java.util.HashMap;
import java.util.Map;

public class Download16c {
private static final Logger log = LoggerFactory.getLogger(Download16c.class);
private static int downloaded = 0;
private static int skipped = 0;
private static final Map<Integer, Integer> yearToFailedDownloads = new HashMap<>();

public static void main(String[] args) throws IOException {
Instant startTime = Instant.now();
Document pageContent = Jsoup.connect("https://github.com/sixteencolors/sixteencolors-archive").get();
log.info ("PageTitle: {}", pageContent.title());
Elements yearLinks = pageContent.body().getElementsByClass("js-navigation-open Link--primary");
try {
Thread.sleep(50 * 1000);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
// Browsing by year
for (Element currentYear: yearLinks) {
String year = currentYear.attr("title");
try {
Integer.parseInt(year);
} catch (NumberFormatException nfe) {
log.warn("Skipping: {} as it seems not to be a valid year folder", year);
continue;
}

String yearUrl = "https://github.com" + currentYear.attr("href");
createFolder(year);
log.info ("\n\n################ {} ################", year);

pageContent = Jsoup.connect(yearUrl).get();
Elements packsLinks = pageContent.body().getElementsByClass("js-navigation-open Link--primary");
for (Element currentPack: packsLinks) {
String packUrl = "https://raw.githubusercontent.com" + currentPack.attr("href").replace("blob/", "");
String fileName = packUrl.split("/")[packUrl.split("/").length - 1];
String targetFilename = "./" + year + "/" + fileName;
if (!new File(targetFilename).exists()) {
downloadFile(packUrl, targetFilename, year);
downloaded++;
} else {
log.warn("File: {} exists, skipping...", targetFilename);
skipped++;
}
}
}

generateReport(startTime, Instant.now());
}

private static void createFolder(String folderName) {
File theDir = new File("./" + folderName);
if (!theDir.exists()){
theDir.mkdirs();
}
}

private static void downloadFile(String urlStr, String dest, String year) {
try (BufferedInputStream in = new BufferedInputStream(new URL(urlStr).openStream());
FileOutputStream fileOutputStream = new FileOutputStream(dest)) {
byte[] dataBuffer = new byte[1024];
int bytesRead;
while ((bytesRead = in.read(dataBuffer, 0, 1024)) != -1) {
fileOutputStream.write(dataBuffer, 0, bytesRead);
}

log.info("File: {} Downloaded Successfully", urlStr);
} catch (IOException e) {
String msg = "\n\nException while downloading: " + urlStr;
log.error(msg, e);
Integer currentFails = yearToFailedDownloads.get(Integer.parseInt(year));
yearToFailedDownloads.put(Integer.valueOf(year), currentFails != null ? currentFails + 1 : 1);
}
}

private static void generateReport(Instant begin, Instant timeNow) {
log.info("\n\n################# REPORT #################");
log.info("Downloaded: {}", downloaded);
log.info("Skipped: {}", skipped);

for (Map.Entry<Integer, Integer> entry: yearToFailedDownloads.entrySet()) {
log.info("Year: {}, FailedDownloads: {}", entry.getKey(), entry.getValue());
}

log.info("Duration: {}", Duration.between(begin, timeNow).toString().replace("PT", "").replace("H", ":").replace("M", ":").replace("S", ""));
}
}
32 changes: 32 additions & 0 deletions src/main/resources/logback.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<configuration>
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{HH:mm:ss.SSS} %-5level %msg%n</pattern>
</encoder>
</appender>

<appender name="FILE-ALL" class="ch.qos.logback.core.FileAppender">
<file>16c-downloader-ALL.log</file>
<append>false</append>
<encoder>
<pattern>%d{HH:mm:ss.SSS} %-5level %msg%n</pattern>
</encoder>
</appender>

<appender name="FILE-ERROR" class="ch.qos.logback.core.FileAppender">
<file>16c-downloader-ERROR.log</file>
<append>false</append>
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
<level>WARN</level>
</filter>
<encoder>
<pattern>%d{HH:mm:ss.SSS} %-5level %msg%n</pattern>
</encoder>
</appender>

<root level="debug">
<appender-ref ref="STDOUT" />
<appender-ref ref="FILE-ALL" />
<appender-ref ref="FILE-ERROR" />
</root>
</configuration>

0 comments on commit 1ec0d6d

Please sign in to comment.