Skip to content

Commit 70f3f2e

Browse files
committed
Filter meaningless images
1 parent 23fba36 commit 70f3f2e

File tree

2 files changed

+33
-1
lines changed

2 files changed

+33
-1
lines changed

app.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,20 @@
1616
@app.get('/search/')
1717
def search(q: str):
1818
results = engine.search(q)
19-
return [x.document.main_image for x in results]
19+
res = [x.document.main_image for x in results]
20+
21+
def is_bullshit(x: str) -> bool:
22+
if x.endswith('20px-Semi-protection-shackle.svg.png'):
23+
return True
24+
if x.endswith('50px-Question_book-new.svg.png'):
25+
return True
26+
if x.endswith('40px-Edit-clear.svg.png'):
27+
return True
28+
if x.endswith('50px-Question_book-new.svg.png'):
29+
return True
30+
if x.endswith('19px-Symbol_support_vote.svg.png'):
31+
return True
32+
return False
33+
34+
res = [x for x in res if not is_bullshit(x)]
35+
return res

wiki_search/dataset/dataset.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import os
88
from nltk.corpus import stopwords
99
from nltk.tokenize import word_tokenize
10+
from torch_geometric.utils import degree
11+
from torch_scatter import scatter
1012
import torch
1113
import ray
1214

@@ -79,6 +81,20 @@ def derive_edges_runner(doc: Document, d2i: Dict[str, int]):
7981
return derive_edges(doc, d2i)
8082

8183

84+
def compute_pr(edge_index, damp: float = 0.85, k: int = 10):
85+
num_nodes = edge_index.max().item() + 1
86+
deg_out = degree(edge_index[0])
87+
x = torch.ones((num_nodes, )).to(edge_index.device).to(torch.float32)
88+
89+
for i in range(k):
90+
edge_msg = x[edge_index[0]] / deg_out[edge_index[0]]
91+
agg_msg = scatter(edge_msg, edge_index[1], reduce='sum')
92+
93+
x = (1 - damp) * x + damp * agg_msg
94+
95+
return x
96+
97+
8298
def ray_reduce(func, xs):
8399
n = len(xs)
84100
if n == 1:

0 commit comments

Comments
 (0)