diff --git a/crawler4j-examples/deadlinksniffer/README.adoc b/crawler4j-examples/deadlinksniffer/README.adoc
new file mode 100644
index 000000000..d96d6083a
--- /dev/null
+++ b/crawler4j-examples/deadlinksniffer/README.adoc
@@ -0,0 +1,45 @@
+= Crawler4j Dead Link Sniffer
+
+This application scans a web page for dead links.
+
+== Compiling
+
+The whole application can be built by using maven
+
+----
+$> mvn clean install
+----
+
+This will also bundle an executable application in `crawler4j-examples/deadlinksniffer/target/appassembler`.
+
+== Usage
+
+==== Getting more help
+For getting the parameter description:
+----
+$> ./bin/DeadLinkSniffer -?
+----
+
+==== Scanning a web page for dead links.
+Example how to scan a sample page for dead links:
+This will scan all sub pages which are reachable by all the `seed` (`-s`) pages given.
+----
+$> ./bin/DeadLinkSniffer -s=http://mypage.org
+----
+
+You can also define which URLs should be accessed via a list of regExp parameters `-u`.
+For defining multiple rules, simply add multiple `-u` parameters.
+
+----
+$> ./bin/DeadLinkSniffer -s=http://mypage.org -u="https://.*mypage.org.*"
+----
+
+==== Output
+
+By default the output files are in `./crawl`.
+The output directory can be specified with the `-o` parameter.
+
+The output directory contains a file `brokenPages.csv` which contains all broken links.
+The first row is the HTTP status, e.g. 404 for 'not found'.
+The second row is the name of the resource which is missing.
+The third row is the html page on which the dead link was found.
\ No newline at end of file
diff --git a/crawler4j-examples/deadlinksniffer/pom.xml b/crawler4j-examples/deadlinksniffer/pom.xml
new file mode 100644
index 000000000..2245bdcaa
--- /dev/null
+++ b/crawler4j-examples/deadlinksniffer/pom.xml
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <artifactId>crawler4j-parent</artifactId>
+        <groupId>edu.uci.ics</groupId>
+        <version>4.5.0-SNAPSHOT</version>
+        <relativePath>../../pom.xml</relativePath>
+    </parent>
+    <artifactId>crawler4j-deadlinksniffer</artifactId>
+
+    <description>find dead links on a web page</description>
+    <url>https://github.com/yasserg/crawler4j</url>
+
+    <dependencies>
+        <dependency>
+            <groupId>edu.uci.ics</groupId>
+            <artifactId>crawler4j</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>commons-cli</groupId>
+            <artifactId>commons-cli</artifactId>
+            <version>1.4</version>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <!--
+                Application can be bundled via
+                $> mvn package appassembler:assemble
+                -->
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>appassembler-maven-plugin</artifactId>
+                <version>2.0.0</version>
+                <configuration>
+                    <programs>
+                        <program>
+                            <mainClass>edu.uci.ics.crawler4j.deadlinksniffer.DeadLinkCrawlController</mainClass>
+                            <id>DeadLinkSniffer</id>
+                        </program>
+                    </programs>
+                    <platforms>all</platforms>
+                </configuration>
+                <executions>
+                    <execution>
+                        <id>bundle</id>
+                        <phase>package</phase>
+                        <goals><goal>assemble</goal></goals>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+
+
+</project>
diff --git a/crawler4j-examples/deadlinksniffer/src/main/java/edu/uci/ics/crawler4j/deadlinksniffer/DeadLinkCrawlConfig.java b/crawler4j-examples/deadlinksniffer/src/main/java/edu/uci/ics/crawler4j/deadlinksniffer/DeadLinkCrawlConfig.java
new file mode 100644
index 000000000..b31eac886
--- /dev/null
+++ b/crawler4j-examples/deadlinksniffer/src/main/java/edu/uci/ics/crawler4j/deadlinksniffer/DeadLinkCrawlConfig.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.crawler4j.deadlinksniffer;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Pattern;
+
+import edu.uci.ics.crawler4j.crawler.CrawlConfig;
+
+/**
+ * @author <a href="mailto:struberg@apache.org">Mark Struberg</a>
+ */
+public class DeadLinkCrawlConfig extends CrawlConfig {
+    private List<Pattern> urlPatterns = new ArrayList<>();
+    private volatile DeadLinkCrawlerStore crawlerStore;
+    private List<Pattern> excludePatterns = new ArrayList<>();
+
+    public List<Pattern> getUrlPatterns() {
+        return urlPatterns;
+    }
+
+    public List<Pattern> getExcludePatterns() {
+        return excludePatterns;
+    }
+
+    /**
+     * Add a regular expression for URLs which should be followed
+     * by the crawler.
+     */
+    public void addUrlPattern(String urlPattern) {
+        this.urlPatterns.add(Pattern.compile(urlPattern));
+    }
+
+    /**
+     * Add a regular expression for URLs which should be excluded from scanning.
+     * This is effectively a stop-criterium and will get evaluated
+     * after all the patterns added via {@link #addUrlPattern(String)}.
+     */
+    public void addExcludePattern(String excludePattern) {
+        this.excludePatterns.add(Pattern.compile(excludePattern));
+    }
+
+    public DeadLinkCrawlerStore getCrawlerStore() {
+        if (crawlerStore == null) {
+            synchronized (this) {
+                if (crawlerStore == null) {
+                    crawlerStore = new DeadLinkCrawlerStore(this);
+                }
+            }
+        }
+
+        return crawlerStore;
+    }
+}
diff --git a/crawler4j-examples/deadlinksniffer/src/main/java/edu/uci/ics/crawler4j/deadlinksniffer/DeadLinkCrawlController.java b/crawler4j-examples/deadlinksniffer/src/main/java/edu/uci/ics/crawler4j/deadlinksniffer/DeadLinkCrawlController.java
new file mode 100644
index 000000000..c79401086
--- /dev/null
+++ b/crawler4j-examples/deadlinksniffer/src/main/java/edu/uci/ics/crawler4j/deadlinksniffer/DeadLinkCrawlController.java
@@ -0,0 +1,228 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.crawler4j.deadlinksniffer;
+
+import edu.uci.ics.crawler4j.crawler.CrawlController;
+import edu.uci.ics.crawler4j.fetcher.PageFetcher;
+import edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig;
+import edu.uci.ics.crawler4j.robotstxt.RobotstxtServer;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.DefaultParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Scan given web pages (seed) for dead links.
+ *
+ * @author Yasser Ganjisaffar
+ * @author <a href="mailto:struberg@apache.org">Mark Struberg</a>
+ */
+public class DeadLinkCrawlController {
+    private static final Logger logger = LoggerFactory.getLogger(DeadLinkCrawlController.class);
+
+    public static void main(String[] args) throws Exception {
+
+        Options options = new Options();
+
+        options.addRequiredOption("s", "seed", true,
+                "Seeding page where the crawling should get started from.");
+
+        options.addOption("u", "url", true,
+                "Regular Expressions for page URLs which should get crawled. " +
+                "If not given the seed will act as a starting point");
+
+        options.addOption("x", "exclude", true,
+                "Regular Expressions for page URLs which should NOT get crawled.");
+
+        options.addOption("?", "help", false,
+                "Print detailed infos about the usage.");
+
+        options.addOption("t", "threads", true,
+                "Number of Threads to use for crawling. Defaults to 1.");
+
+        options.addOption("o", "outDir", true,
+                "output Directory to store the downloaded pages and information. Defaults to ./crawl");
+
+        options.addOption("d", "delay", true,
+                "time delay between requests in ms. Defaults to 1000 (1 second).");
+
+        options.addOption("m", "maxdepth", true,
+                "Maximum Depth of Crawling. Defaults to 3.");
+
+        options.addOption("p", "pages", true,
+                "Maximum number of pages to fetch. Defaults to 2000.");
+
+        CommandLine cmd = null;
+        try {
+            CommandLineParser parser = new DefaultParser();
+            cmd = parser.parse(options, args);
+        }
+        catch (ParseException pe) {
+            logger.info(pe.getMessage());
+            printHelpAndExit(options);
+        }
+
+        if (cmd.hasOption("?")) {
+            printHelpAndExit(options);
+        }
+
+        DeadLinkCrawlConfig config = new DeadLinkCrawlConfig();
+
+        /*
+         * crawlStorageFolder is a folder where intermediate crawl data is
+         * stored.
+         */
+        String crawlStorageFolder =
+                cmd.hasOption("o")
+                ? cmd.getOptionValue("o")
+                : "crawl";
+        config.setCrawlStorageFolder(crawlStorageFolder);
+
+
+        /*
+         * Be polite: Make sure that we don't send more than 1 request per
+         * second (1000 milliseconds between requests).
+         */
+        int delay  =
+                cmd.hasOption("d")
+                ? Integer.parseInt(cmd.getOptionValue("d"))
+                : 1000;
+        config.setPolitenessDelay(delay);
+
+        /*
+         * You can set the maximum crawl depth here. The default value is -1 for
+         * unlimited depth
+         */
+        int maxDepth  =
+                cmd.hasOption("m")
+                ? Integer.parseInt(cmd.getOptionValue("m"))
+                : 3;
+        config.setMaxDepthOfCrawling(maxDepth);
+
+        /*
+         * You can set the maximum number of pages to crawl. The default value
+         * is -1 for unlimited number of pages
+         */
+        int pages  =
+                cmd.hasOption("p")
+                ? Integer.parseInt(cmd.getOptionValue("p"))
+                : 2000;
+        config.setMaxPagesToFetch(pages);
+
+
+        /*
+         * numberOfCrawlers shows the number of concurrent threads that should
+         * be initiated for crawling.
+         */
+        int numberOfCrawlers =
+                cmd.hasOption("t")
+                        ? Integer.parseInt(cmd.getOptionValue("t"))
+                        : 1;
+
+        if (cmd.hasOption("u")) {
+            String[] urlPatterns = cmd.getOptionValues("u");
+
+            for (String urlPattern : urlPatterns) {
+                config.addUrlPattern(urlPattern);
+            }
+        }
+
+        if (cmd.hasOption("x")) {
+            String[] urlPatterns = cmd.getOptionValues("x");
+
+            for (String urlPattern : urlPatterns) {
+                config.addExcludePattern(urlPattern);
+            }
+        }
+
+        /**
+         * Do you want crawler4j to crawl also binary data ?
+         * example: the contents of pdf, or the metadata of images etc
+         */
+        config.setIncludeBinaryContentInCrawling(false);
+
+        /*
+         * Do you need to set a proxy? If so, you can use:
+         * config.setProxyHost("proxyserver.example.com");
+         * config.setProxyPort(8080);
+         *
+         * If your proxy also needs authentication:
+         * config.setProxyUsername(username); config.getProxyPassword(password);
+         */
+
+        /*
+         * This config parameter can be used to set your crawl to be resumable
+         * (meaning that you can resume the crawl from a previously
+         * interrupted/crashed crawl). Note: if you enable resuming feature and
+         * want to start a fresh crawl, you need to delete the contents of
+         * rootFolder manually.
+         */
+        config.setResumableCrawling(false);
+
+        /*
+         * Yes, we gonna follow a HTTP-301
+         */
+        config.setFollowRedirects(true);
+
+
+        /*
+         * Instantiate the controller for this crawl.
+         */
+        PageFetcher pageFetcher = new PageFetcher(config);
+        RobotstxtConfig robotstxtConfig = new RobotstxtConfig();
+        RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher);
+        CrawlController controller = new CrawlController(config, pageFetcher, robotstxtServer);
+
+        /*
+         * For each crawl, you need to add some seed urls. These are the first
+         * URLs that are fetched and then the crawler starts following links
+         * which are found in these pages
+         */
+        boolean addSeedsAsUrls = config.getUrlPatterns().isEmpty();
+        String[] seeds = cmd.getOptionValues("s");
+        for(String seed : seeds) {
+            controller.addSeed(seed);
+            if (addSeedsAsUrls) {
+                config.addUrlPattern("^" + seed + ".*");
+            }
+        }
+
+
+        /*
+         * Start the crawl. This is a blocking operation, meaning that your code
+         * will reach the line after this only when crawling is finished.
+         */
+        controller.start(DeadLinkCrawler.class, numberOfCrawlers);
+    }
+
+    private static void printHelpAndExit(Options options) {
+        HelpFormatter hf = new HelpFormatter();
+        hf.printHelp("\n\tDeadLinkSniffer -?                     - for help"+
+                     "\n\tDeadLinkSniffer -s=http://mypage.org   - for scanning this page" +
+                     "\n\tDeadLinkSniffer -s=http://mypage.org -u=\"https://.*mypage.org.*\"  - for scanning this page with all subdomains, etc"
+                     , options);
+
+        System.exit(-1);
+    }
+
+
+}
\ No newline at end of file
diff --git a/crawler4j-examples/deadlinksniffer/src/main/java/edu/uci/ics/crawler4j/deadlinksniffer/DeadLinkCrawler.java b/crawler4j-examples/deadlinksniffer/src/main/java/edu/uci/ics/crawler4j/deadlinksniffer/DeadLinkCrawler.java
new file mode 100644
index 000000000..9c25127f5
--- /dev/null
+++ b/crawler4j-examples/deadlinksniffer/src/main/java/edu/uci/ics/crawler4j/deadlinksniffer/DeadLinkCrawler.java
@@ -0,0 +1,210 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.crawler4j.deadlinksniffer;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.regex.Pattern;
+
+import edu.uci.ics.crawler4j.crawler.Page;
+import edu.uci.ics.crawler4j.crawler.WebCrawler;
+import edu.uci.ics.crawler4j.parser.HtmlParseData;
+import edu.uci.ics.crawler4j.parser.ImageData;
+import edu.uci.ics.crawler4j.url.WebURL;
+import org.apache.http.Header;
+import org.apache.http.HttpStatus;
+
+/**
+ * TODO: Currently not thread safe!
+ *
+ * @author Yasser Ganjisaffar
+ * @author <a href="mailto:struberg@apache.org">Mark Struberg</a>
+ */
+public class DeadLinkCrawler extends WebCrawler {
+
+    private static final Pattern IMAGE_EXTENSIONS = Pattern.compile(".*\\.(bmp|gif|jpg|png|jpeg|css|js|pdf)$");
+
+
+    private AtomicInteger maxVisits = new AtomicInteger(0);
+
+    private File rootFolder;
+
+    /**
+     * contains all broken Urls detected in {@link #handlePageStatusCode(WebURL, int, String)}
+     */
+    private ConcurrentMap<String, Integer> brokenUrls = new ConcurrentHashMap();
+
+
+    /**
+     * You should implement this function to specify whether the given url
+     * should be crawled or not (based on your crawling logic).
+     */
+    @Override
+    public boolean shouldVisit(Page referringPage, WebURL url) {
+        String href = url.getURL().toLowerCase();
+        // Ignore the url if it has an extension that matches our defined set of image extensions.
+        if (isImageLink(href)) {
+            return false;
+        }
+
+        // Only accept the url if it is in the requested url domains.
+        if (!((DeadLinkCrawlConfig) getMyController().getConfig()).getUrlPatterns()
+                .stream()
+                .anyMatch(pattern -> pattern.matcher(href).matches())) {
+            return false;
+        }
+
+        // and also only if the url is not explicitly excluded
+        if (((DeadLinkCrawlConfig) getMyController().getConfig()).getExcludePatterns()
+                .stream()
+                .anyMatch(pattern -> pattern.matcher(href).matches())) {
+            return false;
+        }
+        return true;
+    }
+
+    @Override
+    protected boolean shouldFollowLinksIn(WebURL url) {
+        int visits = maxVisits.incrementAndGet();
+        logger.info("Number of visits so far: {}", visits);
+        return  true;
+    }
+
+    @Override
+    protected void handlePageStatusCode(WebURL webUrl, int statusCode, String statusDescription) {
+        if (statusCode != HttpStatus.SC_OK &&
+            statusCode != HttpStatus.SC_TEMPORARY_REDIRECT &&
+            statusCode != HttpStatus.SC_MOVED_TEMPORARILY &&
+            statusCode != HttpStatus.SC_MOVED_PERMANENTLY) {
+            logger.info("\n\n FEHLERHAFTE SEITE status {} {} \n\n", statusCode, webUrl.getURL());
+            brokenUrls.put(webUrl.getURL(), statusCode);
+            getConfig().getCrawlerStore().storePageStatus(statusCode, webUrl);
+        }
+    }
+
+    /**
+     * This function is called when a page is fetched and ready to be processed
+     * by your program.
+     */
+    @Override
+    public void visit(Page page) {
+        int docid = page.getWebURL().getDocid();
+        String url = page.getWebURL().getURL();
+        String domain = page.getWebURL().getDomain();
+        String path = page.getWebURL().getPath();
+        String subDomain = page.getWebURL().getSubDomain();
+        String parentUrl = page.getWebURL().getParentUrl();
+        String anchor = page.getWebURL().getAnchor();
+
+        logger.debug("Docid: {}", docid);
+        logger.info("URL: {}", url);
+        logger.debug("Domain: '{}'", domain);
+        logger.debug("Sub-domain: '{}'", subDomain);
+        logger.debug("Path: '{}'", path);
+        logger.debug("Parent page: {}", parentUrl);
+        logger.debug("Anchor text: {}", anchor);
+
+        if (page.getParseData() instanceof HtmlParseData) {
+            HtmlParseData htmlParseData = (HtmlParseData) page.getParseData();
+            String text = htmlParseData.getText();
+            String html = htmlParseData.getHtml();
+            Set<WebURL> links = htmlParseData.getOutgoingUrls();
+
+            logger.debug("Text length: {}", text.length());
+            logger.debug("Html length: {}", html.length());
+            logger.debug("Number of outgoing links: {}", links.size());
+
+            for (WebURL link : links) {
+                if (brokenUrls.keySet().contains(link.getURL())) {
+                    getConfig().getCrawlerStore().storePageStatus(brokenUrls.get(link.getURL()), link);
+                }
+            }
+
+            storeHtml(page.getWebURL(), html);
+
+
+            List<ImageData> imageDatas = htmlParseData.getImageData();
+            int imgNr = 0;
+            for (ImageData imageData : imageDatas) {
+                imgNr++;
+                getConfig().getCrawlerStore().storeImageInfo(page, imgNr, imageData);
+            }
+        }
+
+        Header[] responseHeaders = page.getFetchResponseHeaders();
+        if (responseHeaders != null) {
+            logger.debug("Response heade rs:");
+            for (Header header : responseHeaders) {
+                logger.debug("\t{}: {}", header.getName(), header.getValue());
+            }
+        }
+
+        logger.debug("=============");
+    }
+
+    private void storeHtml(WebURL webURL, String html) {
+        String rootUrl = webURL.getRootUrl();
+        File rootUrlDir = new File(getRootFolder(), rootUrl.replace("/", "_"));
+        if (!rootUrlDir.exists()) {
+            rootUrlDir.mkdir();
+        }
+
+
+        File f = new File(rootUrlDir, webURL.getURL().replace("/", "_"));
+        if (f.exists()) {
+            return;
+        }
+        try (FileWriter fw = new FileWriter(f)) {
+            fw.write(html);
+        }
+        catch (IOException e) {
+            logger.error("could not store file " + f.toString(), e);
+        }
+    }
+
+    @Override
+    public void onBeforeExit() {
+        getConfig().getCrawlerStore().close();
+    }
+
+    private DeadLinkCrawlConfig getConfig() {
+        return (DeadLinkCrawlConfig) getMyController().getConfig();
+    }
+
+    public File getRootFolder() {
+        if (rootFolder == null) {
+            rootFolder = new File(getConfig().getCrawlStorageFolder(), "content");
+            rootFolder.mkdirs();
+        }
+        return rootFolder;
+    }
+
+    private boolean isImageLink(String href) {
+        return IMAGE_EXTENSIONS.matcher(href).matches();
+    }
+
+
+}
diff --git a/crawler4j-examples/deadlinksniffer/src/main/java/edu/uci/ics/crawler4j/deadlinksniffer/DeadLinkCrawlerStore.java b/crawler4j-examples/deadlinksniffer/src/main/java/edu/uci/ics/crawler4j/deadlinksniffer/DeadLinkCrawlerStore.java
new file mode 100644
index 000000000..59f583d65
--- /dev/null
+++ b/crawler4j-examples/deadlinksniffer/src/main/java/edu/uci/ics/crawler4j/deadlinksniffer/DeadLinkCrawlerStore.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.crawler4j.deadlinksniffer;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+
+import edu.uci.ics.crawler4j.crawler.Page;
+import edu.uci.ics.crawler4j.parser.ImageData;
+import edu.uci.ics.crawler4j.url.WebURL;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Stores information about erroneous pages to the disk.
+ *
+ * @author <a href="mailto:struberg@apache.org">Mark Struberg</a>
+ */
+public class DeadLinkCrawlerStore {
+    private static final Logger logger = LoggerFactory.getLogger(DeadLinkCrawlerStore.class);
+
+    private final DeadLinkCrawlConfig config;
+
+    private FileWriter brokenPages;
+    private FileWriter imageWoAlt;
+
+
+    protected DeadLinkCrawlerStore(DeadLinkCrawlConfig config) {
+        this.config = config;
+        try {
+            brokenPages = new FileWriter(new File(config.getCrawlStorageFolder(), "brokenPages.csv"));
+            brokenPages.append("status, url, parent_url\n");
+            brokenPages.flush();
+
+            imageWoAlt = new FileWriter(new File(config.getCrawlStorageFolder(), "imageWoAlt.csv"));
+            imageWoAlt.append("onPage, imageNr, imgSrc, imgLink\n");
+            imageWoAlt.flush();
+        }
+        catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+
+    }
+
+
+
+
+    public synchronized void close() {
+        closeFile(brokenPages, "brokenPages");
+        closeFile(imageWoAlt, "imageWoAlt");
+        brokenPages = null;
+        imageWoAlt = null;
+    }
+
+    private void closeFile(FileWriter fw, String name) {
+        if (fw == null) {
+            return;
+        }
+
+        try {
+            fw.close();
+        }
+        catch (IOException e) {
+            logger.error("problem with closing" + name, e);
+        }
+    }
+
+    public synchronized void storePageStatus(int statusCode, WebURL webUrlFail) {
+        try {
+            brokenPages.append("" + statusCode + ", " + webUrlFail.getURL() + ", " + webUrlFail.getParentUrl() + "\n");
+            brokenPages.flush();
+        }
+        catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public synchronized void storeImageInfo(Page page, int imgNr, ImageData imageData) {
+        // log all images with missing alt tag
+        if (!imageData.getAttrVals().containsKey("alt") || imageData.getAttrVals().get("alt").isEmpty()) {
+            String url = page.getWebURL().getURL();
+            logger.info("\n\n IMAGE without 'alt' tag on page {} img: {}", url, imageData.getSrc());
+            try {
+                String src = imageData.getSrc();
+                String imgLink;
+                if (src.startsWith("https://") || src.startsWith("http://")) {
+                    // absolute image
+                    imgLink = src;
+                }
+                else if (src.startsWith("/")) {
+                    // server-root relative image
+                    imgLink = page.getWebURL().getRootUrl() + src;
+                }
+                else {
+                    // relative image
+                    imgLink = page.getWebURL().getRootUrl() + page.getWebURL().getPath() + "/" + src;
+                }
+                imageWoAlt.append(url + ", " + imgNr + ", " + src + ", " + imgLink + "\n");
+                imageWoAlt.flush();
+            }
+            catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+        }
+    }
+}
diff --git a/crawler4j-examples/deadlinksniffer/src/main/resources/logback.xml b/crawler4j-examples/deadlinksniffer/src/main/resources/logback.xml
new file mode 100644
index 000000000..c716bfa50
--- /dev/null
+++ b/crawler4j-examples/deadlinksniffer/src/main/resources/logback.xml
@@ -0,0 +1,15 @@
+<configuration>
+
+
+    <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+        <encoder>
+            <pattern>%date{HH:mm:ss} %-5level [%thread] - [%logger{0}]- %msg%n</pattern>
+        </encoder>
+    </appender>
+
+    <logger name="com.github.tomakehurst" level="ERROR"/>
+
+    <root level="INFO">
+        <appender-ref ref="STDOUT" />
+    </root>
+</configuration>
diff --git a/crawler4j/src/main/java/edu/uci/ics/crawler4j/parser/HtmlContentHandler.java b/crawler4j/src/main/java/edu/uci/ics/crawler4j/parser/HtmlContentHandler.java
index 67676afcb..1dc428eb5 100644
--- a/crawler4j/src/main/java/edu/uci/ics/crawler4j/parser/HtmlContentHandler.java
+++ b/crawler4j/src/main/java/edu/uci/ics/crawler4j/parser/HtmlContentHandler.java
@@ -18,6 +18,7 @@
 package edu.uci.ics.crawler4j.parser;
 
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -68,6 +69,7 @@ public static Element getElement(String name) {
     private final StringBuilder bodyText;
 
     private final List<ExtractedUrlAnchorPair> outgoingUrls;
+    private final List<ImageData> imageData;
 
     private ExtractedUrlAnchorPair curUrl = null;
     private boolean anchorFlag = false;
@@ -77,6 +79,7 @@ public HtmlContentHandler() {
         isWithinBodyElement = false;
         bodyText = new StringBuilder();
         outgoingUrls = new ArrayList<>();
+        imageData = new ArrayList<>();
     }
 
     @Override
@@ -94,7 +97,19 @@ public void startElement(String uri, String localName, String qName, Attributes
             String imgSrc = attributes.getValue("src");
             if (imgSrc != null) {
                 addToOutgoingUrls(imgSrc, localName);
+
+                Map<String, String> attrVals;
+                if (attributes.getLength() == 0) {
+                    attrVals = Collections.emptyMap();
+                } else {
+                    attrVals = new HashMap<>();
+                    for (int i = 0; i < attributes.getLength(); i++) {
+                        attrVals.put(attributes.getLocalName(i), attributes.getValue(i));
+                    }
+                }
+                imageData.add(new ImageData(imgSrc, attrVals));
             }
+
         } else if ((element == Element.IFRAME) || (element == Element.FRAME) ||
                    (element == Element.EMBED) || (element == Element.SCRIPT)) {
             String src = attributes.getValue("src");
@@ -209,4 +224,8 @@ public String getBaseUrl() {
     public Map<String, String> getMetaTags() {
         return metaTags;
     }
+
+    public List<ImageData> getImageData() {
+        return imageData;
+    }
 }
diff --git a/crawler4j/src/main/java/edu/uci/ics/crawler4j/parser/HtmlParseData.java b/crawler4j/src/main/java/edu/uci/ics/crawler4j/parser/HtmlParseData.java
index cac68fb75..39ffb7f07 100644
--- a/crawler4j/src/main/java/edu/uci/ics/crawler4j/parser/HtmlParseData.java
+++ b/crawler4j/src/main/java/edu/uci/ics/crawler4j/parser/HtmlParseData.java
@@ -17,6 +17,7 @@
 
 package edu.uci.ics.crawler4j.parser;
 
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
@@ -31,6 +32,7 @@ public class HtmlParseData implements ParseData {
 
     private Set<WebURL> outgoingUrls;
     private String contentCharset;
+    private List<ImageData> imageData;
 
     public String getHtml() {
         return html;
@@ -78,6 +80,10 @@ public void setOutgoingUrls(Set<WebURL> outgoingUrls) {
         this.outgoingUrls = outgoingUrls;
     }
 
+    public void setImageData(List<ImageData> imageData) {
+        this.imageData = imageData;
+    }
+
     @Override
     public String toString() {
         return text;
@@ -90,4 +96,8 @@ public void setContentCharset(String contentCharset) {
     public String getContentCharset() {
         return contentCharset;
     }
+
+    public List<ImageData> getImageData() {
+        return imageData;
+    }
 }
\ No newline at end of file
diff --git a/crawler4j/src/main/java/edu/uci/ics/crawler4j/parser/ImageData.java b/crawler4j/src/main/java/edu/uci/ics/crawler4j/parser/ImageData.java
new file mode 100644
index 000000000..14f1859de
--- /dev/null
+++ b/crawler4j/src/main/java/edu/uci/ics/crawler4j/parser/ImageData.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.crawler4j.parser;
+
+import java.util.Map;
+
+/**
+ * Information about images on a page.
+ * Can be used to e.g. detect images without an 'alt' tag.
+ * @author <a href="mailto:struberg@apache.org">Mark Struberg</a>
+ */
+public class ImageData {
+
+    private final String src;
+    private final Map<String, String> attrVals;
+
+    public ImageData(String src, Map<String, String> attrVals) {
+        this.src = src;
+        this.attrVals = attrVals;
+    }
+
+    public String getSrc() {
+        return src;
+    }
+
+    public Map<String, String> getAttrVals() {
+        return attrVals;
+    }
+}
diff --git a/crawler4j/src/main/java/edu/uci/ics/crawler4j/parser/TikaHtmlParser.java b/crawler4j/src/main/java/edu/uci/ics/crawler4j/parser/TikaHtmlParser.java
index 48657bce6..ce9f1ed12 100644
--- a/crawler4j/src/main/java/edu/uci/ics/crawler4j/parser/TikaHtmlParser.java
+++ b/crawler4j/src/main/java/edu/uci/ics/crawler4j/parser/TikaHtmlParser.java
@@ -61,6 +61,8 @@ public HtmlParseData parse(Page page, String contextURL) throws ParseException {
         Set<WebURL> outgoingUrls = getOutgoingUrls(contextURL, contentHandler, contentCharset);
         parsedData.setOutgoingUrls(outgoingUrls);
 
+        parsedData.setImageData(contentHandler.getImageData());
+
         try {
             if (page.getContentCharset() == null) {
                 parsedData.setHtml(new String(page.getContentData()));
diff --git a/crawler4j/src/main/java/edu/uci/ics/crawler4j/url/WebURL.java b/crawler4j/src/main/java/edu/uci/ics/crawler4j/url/WebURL.java
index bd64052bd..34e3c750c 100644
--- a/crawler4j/src/main/java/edu/uci/ics/crawler4j/url/WebURL.java
+++ b/crawler4j/src/main/java/edu/uci/ics/crawler4j/url/WebURL.java
@@ -38,6 +38,7 @@ public class WebURL implements Serializable {
     private int docid;
     private int parentDocid;
     private String parentUrl;
+    private String rootUrl;
     private short depth;
     private String domain;
     private String subDomain;
@@ -93,6 +94,8 @@ public void setURL(String url) {
         if (pathEndIdx >= 0) {
             path = path.substring(0, pathEndIdx);
         }
+
+        this.rootUrl = domainEndIdx > 0 ? url.substring(0, domainEndIdx) : url;
     }
 
     /**
@@ -121,6 +124,15 @@ public void setParentUrl(String parentUrl) {
         this.parentUrl = parentUrl;
     }
 
+    /**
+     * The root URL of the page
+     * E.g. for the url http://somesub.domain.org/myapp?idx=4 this would be
+     * http://somesub.domain.org/
+     */
+    public String getRootUrl() {
+        return rootUrl;
+    }
+
     /**
      * @return
      *      crawl depth at which this Url is first observed. Seed Urls
diff --git a/pom.xml b/pom.xml
index a5af11dd6..06f79abe7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -50,7 +50,11 @@
     <modules>
         <module>crawler4j</module>
         <module>crawler4j-examples/crawler4j-examples-base</module>
+        <!--
+        disabled as it uses palantir stuff...
         <module>crawler4j-examples/crawler4j-examples-postgres</module>
+        -->
+        <module>crawler4j-examples/deadlinksniffer</module>
     </modules>
 
     <properties>