Spaces:
Paused
Paused
backup
Browse files
app.py
CHANGED
|
@@ -262,24 +262,29 @@ def get_statistics():
|
|
| 262 |
if not data_dir.exists():
|
| 263 |
return "No data directory found"
|
| 264 |
|
| 265 |
-
|
| 266 |
-
|
| 267 |
posts_with_responses = 0
|
| 268 |
total_responses = 0
|
| 269 |
responses_per_post = [] # List to track number of responses for each post
|
| 270 |
|
| 271 |
for metadata_file in data_dir.glob("*/metadata.json"):
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
|
| 284 |
# Calculate additional statistics
|
| 285 |
if responses_per_post:
|
|
@@ -295,17 +300,21 @@ def get_statistics():
|
|
| 295 |
stats = f"""
|
| 296 |
📊 Collection Statistics:
|
| 297 |
|
| 298 |
-
|
| 299 |
-
- Total Posts
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
- Posts with Responses: {posts_with_responses}
|
|
|
|
| 301 |
- Total Individual Responses: {total_responses}
|
| 302 |
-
- Completion Rate: {(posts_with_responses/len(VALID_DATASET_POST_IDS)*100):.2f}%
|
| 303 |
|
| 304 |
Response Distribution:
|
| 305 |
- Median Responses per Post: {median_responses}
|
| 306 |
- Average Responses per Post: {avg_responses:.2f}
|
| 307 |
- Maximum Responses for a Post: {max_responses}
|
| 308 |
-
- Posts with No Responses: {total_posts - posts_with_responses}
|
| 309 |
"""
|
| 310 |
return stats
|
| 311 |
|
|
|
|
| 262 |
if not data_dir.exists():
|
| 263 |
return "No data directory found"
|
| 264 |
|
| 265 |
+
total_expected_posts = len(VALID_DATASET_POST_IDS)
|
| 266 |
+
processed_post_ids = set()
|
| 267 |
posts_with_responses = 0
|
| 268 |
total_responses = 0
|
| 269 |
responses_per_post = [] # List to track number of responses for each post
|
| 270 |
|
| 271 |
for metadata_file in data_dir.glob("*/metadata.json"):
|
| 272 |
+
post_id = metadata_file.parent.name
|
| 273 |
+
if post_id in VALID_DATASET_POST_IDS: # Only count valid posts
|
| 274 |
+
processed_post_ids.add(post_id)
|
| 275 |
+
try:
|
| 276 |
+
with open(metadata_file, "r") as f:
|
| 277 |
+
metadata = json.load(f)
|
| 278 |
+
num_responses = len(metadata.get("responses", []))
|
| 279 |
+
responses_per_post.append(num_responses)
|
| 280 |
+
if num_responses > 0:
|
| 281 |
+
posts_with_responses += 1
|
| 282 |
+
total_responses += num_responses
|
| 283 |
+
except:
|
| 284 |
+
continue
|
| 285 |
+
|
| 286 |
+
missing_posts = set(map(str, VALID_DATASET_POST_IDS)) - processed_post_ids
|
| 287 |
+
total_processed = len(processed_post_ids)
|
| 288 |
|
| 289 |
# Calculate additional statistics
|
| 290 |
if responses_per_post:
|
|
|
|
| 300 |
stats = f"""
|
| 301 |
📊 Collection Statistics:
|
| 302 |
|
| 303 |
+
Dataset Coverage:
|
| 304 |
+
- Total Expected Posts: {total_expected_posts}
|
| 305 |
+
- Posts Processed: {total_processed}
|
| 306 |
+
- Missing Posts: {len(missing_posts)} ({', '.join(list(missing_posts)[:5])}{'...' if len(missing_posts) > 5 else ''})
|
| 307 |
+
- Coverage Rate: {(total_processed/total_expected_posts*100):.2f}%
|
| 308 |
+
|
| 309 |
+
Response Statistics:
|
| 310 |
- Posts with Responses: {posts_with_responses}
|
| 311 |
+
- Posts without Responses: {total_processed - posts_with_responses}
|
| 312 |
- Total Individual Responses: {total_responses}
|
|
|
|
| 313 |
|
| 314 |
Response Distribution:
|
| 315 |
- Median Responses per Post: {median_responses}
|
| 316 |
- Average Responses per Post: {avg_responses:.2f}
|
| 317 |
- Maximum Responses for a Post: {max_responses}
|
|
|
|
| 318 |
"""
|
| 319 |
return stats
|
| 320 |
|