I want to extract the text from this image. I tried removing the rectangle contour so I started detecting the horizontal and vertical lines that form the boxes. But I found a problem where some characters pixels were mistakenly identified as vertical lines. to obtain a clean image without the rectangle boxes, containing only the line texts, so I can then apply pytesseract for text extraction.
你们能否帮助提出任何消除试金箱的建议?
谢谢!
import cv2
from PIL import Image
import matplotlib.pylab as plt
image = io.imread("sample.png")
result = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
#Remove horizontal lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40,1))
remove_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(remove_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(result, [c], -1, (255,255,255), 5)
plt.imshow(result)
https://i.stack.imgur.com/Jpte7.png”rel=“nofollow noreferer”>removing Horizon
# Remove vertical lines
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,40))
remove_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
cnts = cv2.findContours(remove_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(result, [c], -1, (255,255,255), 5)
plt.imshow(result)
https://i.stack.imgur.com/MXjMJ.png”rel=“nofollow noreferer”>removing Horizon and modern Line