Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge Version 1.2.0 into main #2

Merged
merged 8 commits into from
Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions src/main/java/Cosmos/Common/Util.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import Cosmos.Data.WebPage;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;

// This class offers common utilities needed for Cosmos
public class Util {
Expand All @@ -26,4 +28,16 @@ public static void weightPages(ArrayList<WebPage> pages, String query) {
}
}

public static ArrayList<String> getDuplicatesFromList(ArrayList<String> list) {
final ArrayList<String> outList = new ArrayList<>();
final Set<String> filterSet = new HashSet<>();

for (String It : list) {
if (!filterSet.add(It)) {
outList.add(It);
}
}
return outList;
}

}
36 changes: 20 additions & 16 deletions src/main/java/Cosmos/Data/Database.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package Cosmos.Data;

import Cosmos.Common.Log;
import Cosmos.Common.Seeds;

import java.sql.*;
Expand All @@ -25,11 +26,14 @@ public Database() {
private static Connection connectToDatabase() throws SQLException {
return DriverManager.getConnection("jdbc:mysql://localhost:3306/", "root", "1234");
}
public void closeConnection() throws SQLException {
connectionDatabase.close();
}

public static void setup() {
try {
connectionMain = connectToDatabase();
System.out.println("Database connected!");
Log.info("Database connected!");
} catch (SQLException e) {
throw new IllegalStateException("Cannot connect the database!", e);
}
Expand Down Expand Up @@ -147,21 +151,21 @@ public void insertIndicies(String url, ArrayList<String> indicies) throws SQLExc
stmt.execute(sql + ";");
}

public static SearchResult processQuery(String query) {
public SearchResult processQuery(String query) {
final String[] strs = query.split(" ");
ArrayList<String> tokens = new ArrayList<>();
for (String str : strs) {
tokens.add(str);
}
return processTokens(tokens);
}
public static SearchResult processTokens(ArrayList<String> tokens) {
public SearchResult processTokens(ArrayList<String> tokens) {
SearchResult result = new SearchResult();
Instant start = Instant.now();

for (String token : tokens) {
try {
Statement stmt = connectionMain.createStatement();
Statement stmt = connectionDatabase.createStatement();
ResultSet rs = stmt.executeQuery("SELECT url, title FROM webcontent, webindex WHERE webcontent.id = webindex.contentID AND idx = '" + token + "';");

while (rs.next()) {
Expand All @@ -179,10 +183,18 @@ public static SearchResult processTokens(ArrayList<String> tokens) {

return result;
}
public int getDepthFromURL(String url) throws SQLException {
Statement stmt = connectionDatabase.createStatement();
ResultSet result = stmt.executeQuery("SELECT depth FROM webcontent WHERE url = '" + url + "';");

result.next();
return result.getInt(1);
}

public static int getWebContentCount() {
// For use in HomeView
public int getWebContentCount() {
try {
Statement stmt = connectionMain.createStatement();
Statement stmt = connectionDatabase.createStatement();
ResultSet result = stmt.executeQuery("SELECT COUNT(*) FROM webcontent;");

result.next();
Expand All @@ -191,9 +203,9 @@ public static int getWebContentCount() {
throw new RuntimeException(e);
}
}
public static int getWebIndexCount() {
public int getWebIndexCount() {
try {
Statement stmt = connectionMain.createStatement();
Statement stmt = connectionDatabase.createStatement();
ResultSet result = stmt.executeQuery("SELECT COUNT(*) FROM webindex;");

result.next();
Expand All @@ -202,12 +214,4 @@ public static int getWebIndexCount() {
throw new RuntimeException(e);
}
}

public int getDepthFromURL(String url) throws SQLException {
Statement stmt = connectionDatabase.createStatement();
ResultSet result = stmt.executeQuery("SELECT depth FROM webcontent WHERE url = '" + url + "';");

result.next();
return result.getInt(1);
}
}
16 changes: 13 additions & 3 deletions src/main/java/Cosmos/Data/WebCrawler.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package Cosmos.Data;

import Cosmos.Common.Log;
import Cosmos.Common.Util;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
Expand All @@ -13,13 +14,19 @@

public class WebCrawler extends Thread {

public static final int MAX_DEPTH = 5;
private Database database;

public WebCrawler() {
reconnectDatabase();
}

private void reconnectDatabase() {
if (database != null) {
try {
database.closeConnection();
} catch (SQLException ignored) { }
}
database = new Database();
}

Expand All @@ -42,8 +49,10 @@ private void indexWebPage(String url) {
Log.info("Indexing " + url);

try {

int depth = database.getDepthFromURL(url);
if (depth > MAX_DEPTH) {
return;
}
String html = null;
URLConnection connection = null;
try {
Expand All @@ -68,8 +77,9 @@ private void indexWebPage(String url) {
database.insertBulkURLs(hrefs, depth + 1);
ArrayList<String> tokens = extractTokensFromDoc(doc);
database.deleteIndiciesForURL(url);
database.insertIndicies(url, tokens);

ArrayList<String> indices = Util.getDuplicatesFromList(tokens);
indices.add(doc.title().toLowerCase());
database.insertIndicies(url, indices);

}
catch (SQLException e) {
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/Cosmos/Views/HomeView.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ public HomeView() {
Component createContent() {
H1 cosmos = new H1("cosmos");

H6 info = new H6("Found " + Database.getWebContentCount() + " WebPages using " + Database.getWebIndexCount() + " Indicies.");
Database database = new Database();
H6 info = new H6("Found " + database.getWebContentCount() + " WebPages using " + database.getWebIndexCount() + " Indicies.");

HorizontalLayout layout = new HorizontalLayout();
layout.setAlignItems(FlexComponent.Alignment.CENTER);
Expand Down
8 changes: 7 additions & 1 deletion src/main/java/Cosmos/Views/SearchView.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import com.vaadin.flow.router.BeforeEvent;
import com.vaadin.flow.router.HasUrlParameter;
import com.vaadin.flow.router.Route;

import java.sql.SQLException;
import java.util.ArrayList;

@Route("/search")
Expand Down Expand Up @@ -79,7 +81,11 @@ public Component createContent(String query) {
search.addClickShortcut(Key.ENTER);
layout.add(new HorizontalLayout(cosmos, input, search));

SearchResult result = Database.processQuery(query.toLowerCase());
Database database = new Database();
SearchResult result = database.processQuery(query.toLowerCase());
try {
database.closeConnection();
} catch (SQLException ignored) { }
layout.add(new H6("Found " + result.matches.size() + " Entries in " + result.elapsedTime + " Seconds."));

ArrayList<WebPage> pages = new ArrayList<>(result.matches.values());
Expand Down