@@ 7,9 7,10 @@ from imutils.object_detection import non_max_suppression
import numpy as np
from textblob import TextBlob
-TESSCONFIG = "-c tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ\\\'\!\?\ \. --oem 1"
+TESSCONFIG = "-c tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ\\'\!\?\ \. --oem 1"
-class ComicScanner():
+
+class ComicScanner:
def __init__(self) -> None:
self.net = cv2.dnn.readNet("frozen_east_text_detection.pb")
@@ 19,23 20,22 @@ class ComicScanner():
(H, W) = image.shape[:2]
# set the new width and height and then determine the ratio in change
# for both the width and height
- (newW, newH) = (W>>5<<5, H>>5<<5)
+ (newW, newH) = (W >> 5 << 5, H >> 5 << 5)
rW = W / float(newW)
rH = H / float(newH)
-
+
img = cv2.resize(image, (newW, newH))
(H, W) = img.shape[:2]
# define the two output layer names for the EAST detector model that
# we are interested -- the first is the output probabilities and the
# second can be used to derive the bounding box coordinates of text
- layerNames = [
- "feature_fusion/Conv_7/Sigmoid",
- "feature_fusion/concat_3"]
+ layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]
# construct a blob from the image and then perform a forward pass of
# the model to obtain the two output layer sets
- blob = cv2.dnn.blobFromImage(img, 1.0, (W, H),
- (123.68, 116.78, 103.94), swapRB=True, crop=False)
+ blob = cv2.dnn.blobFromImage(
+ img, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False
+ )
self.net.setInput(blob)
(scores, geometry) = self.net.forward(layerNames)
# grab the number of rows and columns from the scores volume, then
@@ 55,7 55,7 @@ class ComicScanner():
xData2 = geometry[0, 2, y]
xData3 = geometry[0, 3, y]
anglesData = geometry[0, 4, y]
- # loop over the number of columns
+ # loop over the number of columns
for x in range(0, numCols):
# if our score does not have sufficient probability, ignore it
if scoresData[x] < 0.5:
@@ 70,8 70,8 @@ class ComicScanner():
sin = np.sin(angle)
# use the geometry volume to derive the width and height of
# the bounding box
- h = 1.2*(xData0[x] + xData2[x])
- w = 1.2*(xData1[x] + xData3[x])
+ h = 1.2 * (xData0[x] + xData2[x])
+ w = 1.2 * (xData1[x] + xData3[x])
# compute both the starting and ending (x, y)-coordinates for
# the text prediction bounding box
endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
@@ 97,30 97,30 @@ class ComicScanner():
box = (startX, startY, endX, endY)
matched = False
for i in range(0, len(clusters)):
- if (rect_overlaps(box, clusters[i])):
+ if rect_overlaps(box, clusters[i]):
matched = True
clusters[i] = (
min(box[0], clusters[i][0]),
min(box[1], clusters[i][1]),
max(box[2], clusters[i][2]),
- max(box[3], clusters[i][3])
+ max(box[3], clusters[i][3]),
)
if not matched:
clusters.append(box)
# Remove duplicate clusters
- for _ in range(0,1):
+ for _ in range(0, 1):
finalClusters = []
for cluster in clusters:
matched = False
for i in range(0, len(finalClusters)):
- if (rect_overlaps(cluster, finalClusters[i])):
+ if rect_overlaps(cluster, finalClusters[i]):
matched = True
finalClusters[i] = (
min(cluster[0], finalClusters[i][0]),
min(cluster[1], finalClusters[i][1]),
max(cluster[2], finalClusters[i][2]),
- max(cluster[3], finalClusters[i][3])
+ max(cluster[3], finalClusters[i][3]),
)
if not matched:
finalClusters.append(cluster)
@@ 130,23 130,42 @@ class ComicScanner():
suppressedClusters = non_max_suppression(np.array(finalClusters))
for (startX, startY, endX, endY) in suppressedClusters:
- cropStartX = int(max(startX-10, 0))
- cropStartY = int(max(startY-10, 0))
- cropEndX = int(min(endX+10,W*rW))
- cropEndY = int(min(endY+10,H*rH))
+ cropStartX = int(max(startX - 10, 0))
+ cropStartY = int(max(startY - 10, 0))
+ cropEndX = int(min(endX + 10, W * rW))
+ cropEndY = int(min(endY + 10, H * rH))
cropped = orig[cropStartY:cropEndY, cropStartX:cropEndX]
- text = str(TextBlob("".join([c if ord(c) < 128 else "" for c in pytesseract.image_to_string(cropped, lang="eng", config=TESSCONFIG)]).strip().replace("[", "I").replace("\\", "l").replace("/","i").lower()).correct())
+ text = str(
+ TextBlob(
+ "".join(
+ [
+ c if ord(c) < 128 else ""
+ for c in pytesseract.image_to_string(
+ cropped, lang="eng", config=TESSCONFIG
+ )
+ ]
+ )
+ .strip()
+ .replace("[", "I")
+ .replace("\\", "l")
+ .replace("/", "i")
+ .lower()
+ ).correct()
+ )
yield text
-def rect_overlaps(r1,r2):
+
+def rect_overlaps(r1, r2):
poly1 = Polygon([[r1[0], r1[1]], [r1[0], r1[3]], [r1[2], r1[1]], [r1[2], r1[3]]])
poly2 = Polygon([[r2[0], r2[1]], [r2[0], r2[3]], [r2[2], r2[1]], [r2[2], r2[3]]])
return poly1.distance(poly2) < 5
+
def main():
- parser = argparse.ArgumentParser(description='Comic OCR processor')
- parser.add_argument('paths', metavar='path', type=str, nargs='+',
- help='paths to images to scan')
+ parser = argparse.ArgumentParser(description="Comic OCR processor")
+ parser.add_argument(
+ "paths", metavar="path", type=str, nargs="+", help="paths to images to scan"
+ )
args = parser.parse_args()
@@ 156,8 175,9 @@ def main():
except:
print("Path does not exist, or is not a valid image")
break
- for text in scan_image(img):
+ for text in ComicScanner.scan_image(img):
print(text)
+
if __name__ == "__main__":
main()