graphics/py-pytesseract: fix DICT conversion

...via tsv file; this was the one failing self-test, now passes.

https://github.com/madmaze/pytesseract/issues/406
This commit is contained in:
Matthias Andree
2022-01-28 00:48:12 +01:00
parent 79959846f0
commit fb684eef54
2 changed files with 40 additions and 1 deletions

View File

@@ -1,5 +1,6 @@
PORTNAME= pytesseract
PORTVERSION= 0.3.9
PORTREVISION= 1
DISTVERSIONPREFIX= v
CATEGORIES= graphics python
PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX}
@@ -7,6 +8,8 @@ PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX}
MAINTAINER= mandree@FreeBSD.org
COMMENT= wrapper for Google's Tesseract OCR engine
PATCH_STRIP= -p1
LICENSE= BSD2CLAUSE
LICENSE_FILE= ${WRKSRC}/LICENSE
@@ -25,7 +28,7 @@ USE_PYTHON= autoplist concurrent distutils
NO_ARCH= yes
do-test:
cd ${WRKSRC} && ${SETENV} ${TEST_ENV} tox -e ${PY_FLAVOR} --sitepackages
cd ${WRKSRC} && ${SETENV} ${TEST_ENV} tox-${PYTHON_VER} -e ${PY_FLAVOR} --sitepackages
.include <bsd.port.pre.mk>

View File

@@ -0,0 +1,36 @@
This is obtained from upstream and ADDITIONALLY
changes the try: val = int(row[i]) in upstream int3l@github's version
to int(float(row[i])). -- Matthias Andree, mandree@FreeBSD.org
From 06e7f8077467950d2f4e0f619fb193730c2d2079 Mon Sep 17 00:00:00 2001
From: int3l <int3l@users.noreply.github.com>
Date: Thu, 27 Jan 2022 16:09:21 +0200
Subject: [PATCH] Fix confidence conversion from str to int
Account for negative values. Fixes #406
---
pytesseract/pytesseract.py | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/pytesseract/pytesseract.py b/pytesseract/pytesseract.py
index 984b106..e927e80 100644
--- a/pytesseract/pytesseract.py
+++ b/pytesseract/pytesseract.py
@@ -313,9 +313,14 @@ def file_to_dict(tsv, cell_delimiter, str_col_idx):
if len(row) <= i:
continue
- val = row[i]
- if row[i].isdigit() and i != str_col_idx:
- val = int(row[i])
+ if i != str_col_idx:
+ try:
+ val = int(float(row[i]))
+ except ValueError:
+ val = row[i]
+ else:
+ val = row[i]
+
result[head].append(val)
return result