Files changed (1) hide show
  1. utils.py +102 -0
utils.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import html
3
+ import logging
4
+ import json
5
+ from PIL import Image
6
+
7
+
8
+ def truncate_last_incomplete_element(text: str):
9
+ """Truncates the last incomplete element"""
10
+
11
+ # For very long text (>50k) or text not ending with ']', directly truncate the last '{"bbox":'
12
+ needs_truncation = len(text) > 50000 or not text.strip().endswith("]")
13
+
14
+ if needs_truncation:
15
+ # Check how many dict objects there are
16
+ bbox_count = text.count('{"bbox":')
17
+
18
+ # If there is only one dict object, do not truncate to avoid deleting the only object
19
+ if bbox_count <= 1:
20
+ # print(f" ⚠️ Only {bbox_count} dict objects found, skipping truncation to avoid deleting all content")
21
+ return text, False
22
+
23
+ # Find the position of the last '{"bbox":'
24
+ last_bbox_pos = text.rfind('{"bbox":')
25
+
26
+ if last_bbox_pos > 0:
27
+ # Truncate before this position
28
+ truncated_text = text[:last_bbox_pos].rstrip()
29
+
30
+ # Remove trailing comma
31
+ if truncated_text.endswith(","):
32
+ truncated_text = truncated_text[:-1]
33
+ truncated_text = truncated_text + "]"
34
+ # print(f" ✂️ Truncated the last incomplete element, length reduced from {len(text):,} to {len(truncated_text):,}")
35
+ return truncated_text, True
36
+
37
+ return text, False
38
+
39
+
40
+ def obtain_origin_hw(image_paths: list[str]):
41
+ try:
42
+ image = Image.open(image_paths[0]).convert("RGB")
43
+ origin_width, origin_height = image.size
44
+ return origin_height, origin_width
45
+ except Exception as e:
46
+ print(f"处理图像 {image_paths[0]} 时出错: {e}")
47
+ return 1000, 1000
48
+
49
+
50
+ def restore_abs_bbox_coordinates(ans: str, origin_height: float, origin_width: float):
51
+ is_valid = 0
52
+ ans = extract_json_content(ans)
53
+ ans, _ = truncate_last_incomplete_element(ans)
54
+ try:
55
+ data = json.loads(ans)
56
+ is_valid = 1
57
+ except Exception as e:
58
+ try:
59
+ data = eval(ans)
60
+ is_valid = 1
61
+ except Exception as e:
62
+ print(f"解析json时出错: {e}")
63
+ return ans
64
+
65
+ if len(data) != 0:
66
+ for index, item in enumerate(data):
67
+ for key in item:
68
+ if "bbox" in key:
69
+ if len(item[key]) == 4 and all(isinstance(coord, (int, float)) for coord in item[key]):
70
+ x1, y1, x2, y2 = item[key]
71
+ new_x1 = int(x1 / 1000.0 * origin_width)
72
+ new_y1 = int(y1 / 1000.0 * origin_height)
73
+ new_x2 = int(x2 / 1000.0 * origin_width)
74
+ new_y2 = int(y2 / 1000.0 * origin_height)
75
+ item[key] = [new_x1, new_y1, new_x2, new_y2]
76
+ else:
77
+ eval_logger.info(f"ERROR CHECK: idx {index}, {data}")
78
+
79
+ if is_valid:
80
+ return json.dumps(data, indent=4)
81
+ else:
82
+ return ans
83
+
84
+
85
+ def convert_json_to_markdown(ans: str, keep_header_footer: bool = False):
86
+ ans = extract_json_content(ans)
87
+ ans, _ = truncate_last_incomplete_element(ans)
88
+ try:
89
+ res_str = []
90
+ ans_dict = json.loads(ans)
91
+ for sub_item in ans_dict:
92
+ if "text" in sub_item:
93
+ if sub_item["text"]:
94
+ if keep_header_footer:
95
+ res_str.append(sub_item["text"])
96
+ else:
97
+ if sub_item["category"] not in ["header", "footer", "page_footnote"]:
98
+ res_str.append(sub_item["text"])
99
+ return "\n\n".join(res_str) if res_str else ans
100
+ except Exception as e:
101
+ print(f"process ans error: {e}")
102
+ return ans