Skip to content

Commit

Permalink
Merge pull request #76 from CogitoNTNU/tango-scraper
Browse files Browse the repository at this point in the history
Tango scraper
  • Loading branch information
Knolaisen authored Apr 4, 2024
2 parents 888ad33 + 070e299 commit bafc3f1
Show file tree
Hide file tree
Showing 29 changed files with 1,763 additions and 1 deletion.
17 changes: 17 additions & 0 deletions backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,23 @@ WORKDIR /code
COPY requirements.txt /code/
RUN pip install -r requirements.txt

# Install Tesseract OCR using apt-get
RUN apt-get update && \
apt-get install -y \
build-essential \
python3-dev \
python3-setuptools \
libgl1-mesa-glx \
tesseract-ocr \
tesseract-ocr-nor \
make \
gcc \
&& python3 -m pip install -r requirements.txt \
&& apt-get remove -y --purge make gcc build-essential \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/*


# Then copy the rest of the code
COPY . /code/

Expand Down
Empty file.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
253 changes: 253 additions & 0 deletions backend/flashcards/text_scraper/image_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
from abc import ABC
from PIL import Image
import cv2
from matplotlib import pyplot as plt
import numpy as np





class Filter(ABC):
"""Filter is an abstract class that defines the interface for all filters
Args:
ABC (_type_): _description_
"""

def __call__(self, image):
pass


class Invert_image(Filter):
"""Invert the image, callable
Args:
image
Returns:
image
"""
def __call__(self, image):
return cv2.bitwise_not(image)


class Grayscale(Filter):
"""Grayscale the image, callable
Args:
image
Returns:
image
"""
def __call__(self, image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

class Binarize(Filter):
"""Binarize the image, callable
Args:
image
Returns:
image
"""
def __call__(self, image):
try: # if the image is already grayscale, this will throw an error
filter: Filter = Grayscale()
image = filter(image)
except:
pass

thresh, im_bw = cv2.threshold(image, 200, 230, cv2.THRESH_BINARY) # must calibrate these values
return im_bw

class Remove_noise(Filter):
"""Remove noice from the image, callable
Args:
image
Returns:
image
"""
def __call__(self, image):
kernel = np.ones((1,1), np.uint8)
image = cv2.dilate(image, kernel, iterations=1)
kernel = np.ones((1,1), np.uint8) # must calibrate these values
image = cv2.erode(image, kernel, iterations=1)
image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel)
image = cv2.medianBlur(image, 3)
return image


class Thin_font(Filter):
"""Make text thinner, callable
Args:
image
Returns:
image
"""
def __call__(self, image):
image = cv2.bitwise_not(image)
kernel = np.ones((2,2), np.uint8)
image = cv2.erode(image, kernel, iterations=1)
image = cv2.bitwise_not(image)
return image


class Thick_font(Filter):
"""makes text bold, callable
Args:
image
Returns:
image
"""
def __call__(self, image):
image = cv2.bitwise_not(image)
kernel = np.ones((2,2), np.uint8)
image = cv2.dilate(image, kernel, iterations=1)
image = cv2.bitwise_not(image)
return image


class Remove_borders(Filter):
"""Remove borders, callable
Args:
image
Returns:
image
"""
def __call__(self, image):
contours, hierarchy = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours_sorted = sorted(contours, key=lambda x:cv2.contourArea(x))
largest_bounding_box = contours_sorted[-1]
x,y,w,h = cv2.boundingRect(largest_bounding_box)
crop = image[y:y+h, x:x+w]
return crop

class Add_borders(Filter):
"""add boarders, callable
Args:
image
Returns:
image
"""
def __call__(self, image):
color = [255, 255, 255]
top, bottom, left, right = [150]*4
image_with_border = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
return image_with_border



class Deskew(Filter):
"""rotates the image in necessary, callable
Args:
image
Returns:
image
"""
def __call__(self, image):
angle = self.getSkewAngle(image)
image = self.rotateImage(image, -1.0 * angle)
return image


# Rotation and deskewing
# remove border before using this
#https://becominghuman.ai/how-to-automatically-deskew-straighten-a-text-image-using-opencv-a0c30aed83df

def getSkewAngle(self, cvImage) -> float:
# Prep image, copy, convert to gray scale, blur, and threshold
newImage = cvImage.copy()
if len(newImage.shape) == 3 and newImage.shape[2] == 3: # Check if image is color, i might remove this later
gray = cv2.cvtColor(newImage, cv2.COLOR_BGR2GRAY)
else:
gray = newImage.copy()
blur = cv2.GaussianBlur(gray, (9, 9), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

# Apply dilate to merge text into meaningful lines/paragraphs.
# Use larger kernel on X axis to merge characters into single line, cancelling out any spaces.
# But use smaller kernel on Y axis to separate between different blocks of text
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 5))
dilate = cv2.dilate(thresh, kernel, iterations=2)

# Find all contours
contours, hierarchy = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key = cv2.contourArea, reverse = True)
for c in contours:
rect = cv2.boundingRect(c)
x,y,w,h = rect
cv2.rectangle(newImage,(x,y),(x+w,y+h),(0,255,0),2)

# Find largest contour and surround in min area box
largestContour = contours[0]
#print (len(contours))
minAreaRect = cv2.minAreaRect(largestContour)
cv2.imwrite("temp/boxes.jpg", newImage)
# Determine the angle. Convert it to the value that was originally used to obtain skewed image
angle = minAreaRect[-1]
#print(angle)
if angle < -45:
angle = 90 + angle
return -1.0 * angle

# Rotate the image around its center
def rotateImage(self, cvImage, angle: float):
newImage = cvImage.copy()
(h, w) = newImage.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
newImage = cv2.warpAffine(newImage, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
return newImage


#https://stackoverflow.com/questions/28816046/
#displaying-different-images-with-actual-size-in-matplotlib-subplot

class Display(Filter):
"""Displays the image, callable
Args:
image
Returns:
image
"""
def __call__(self, im_data):

dpi = 80

height, width = im_data.shape[:2]

# What size does the figure need to be in inches to fit the image?
figsize = width / float(dpi), height / float(dpi)

# Create a figure of the right size with one axes that takes up the full figure
fig = plt.figure(figsize=figsize)
ax = fig.add_axes([0, 0, 1, 1])

# Hide spines, ticks, etc.
ax.axis('off')

# Display the image.
ax.imshow(im_data, cmap='gray')

plt.show()


if __name__=="__main__":
image_file = "TutorAI/backend/flashcards/text_scraper/assets/page_01_rotated.jpg"
image = cv2.imread(image_file)


filter = Deskew()
image = filter(image)

filter = Invert_image()
image = filter(image)

Display()(image)
Empty file.
Loading

0 comments on commit bafc3f1

Please sign in to comment.