Add confidence breakdown display
- Shows High/Medium/Low count breakdown
- Helps verify all matching posts will be processed
- Example output:
Filtered to 328 proposals (confidence >= Medium)
Breakdown: High=293, Medium=35, Low=0
Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
34
check_confidence.py
Normal file
34
check_confidence.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import csv
|
||||||
|
from collections import Counter
|
||||||
|
import glob
|
||||||
|
|
||||||
|
files = sorted(glob.glob('output/category_proposals_*.csv'))
|
||||||
|
if files:
|
||||||
|
with open(files[-1], 'r') as f:
|
||||||
|
reader = csv.DictReader(f)
|
||||||
|
proposals = list(reader)
|
||||||
|
|
||||||
|
print("=== All Proposals ===")
|
||||||
|
print(f"Total: {len(proposals)}\n")
|
||||||
|
|
||||||
|
print("By Site:")
|
||||||
|
sites = Counter(p['current_site'] for p in proposals)
|
||||||
|
for site, count in sorted(sites.items()):
|
||||||
|
print(f" {site}: {count}")
|
||||||
|
|
||||||
|
print("\nBy Confidence (all sites):")
|
||||||
|
confs = Counter(p['category_confidence'] for p in proposals)
|
||||||
|
for conf, count in sorted(confs.items()):
|
||||||
|
print(f" {conf}: {count}")
|
||||||
|
|
||||||
|
print("\nBy Site and Confidence:")
|
||||||
|
for site in ['mistergeek.net', 'webscroll.fr', 'hellogeek.net']:
|
||||||
|
site_props = [p for p in proposals if p['current_site'] == site]
|
||||||
|
confs = Counter(p['category_confidence'] for p in site_props)
|
||||||
|
print(f"\n {site} ({len(site_props)} total):")
|
||||||
|
for conf, count in sorted(confs.items()):
|
||||||
|
print(f" {conf}: {count}")
|
||||||
|
|
||||||
|
medium_or_better = [p for p in site_props if p['category_confidence'] in ['High', 'Medium']]
|
||||||
|
print(f" → Would process with -c Medium (default): {len(medium_or_better)}")
|
||||||
@@ -381,6 +381,12 @@ class CategoryAssignmentProcessor:
|
|||||||
if confidence_order.get(p.get('category_confidence', 'Medium'), 2) >= min_confidence
|
if confidence_order.get(p.get('category_confidence', 'Medium'), 2) >= min_confidence
|
||||||
]
|
]
|
||||||
logger.info(f"Filtered to {len(filtered_proposals)} proposals (confidence >= {confidence_threshold})")
|
logger.info(f"Filtered to {len(filtered_proposals)} proposals (confidence >= {confidence_threshold})")
|
||||||
|
|
||||||
|
# Show breakdown
|
||||||
|
high_count = sum(1 for p in filtered_proposals if p.get('category_confidence') == 'High')
|
||||||
|
medium_count = sum(1 for p in filtered_proposals if p.get('category_confidence') == 'Medium')
|
||||||
|
low_count = sum(1 for p in filtered_proposals if p.get('category_confidence') == 'Low')
|
||||||
|
logger.info(f" Breakdown: High={high_count}, Medium={medium_count}, Low={low_count}")
|
||||||
|
|
||||||
# Fetch existing categories
|
# Fetch existing categories
|
||||||
self.category_manager.fetch_categories(site_name)
|
self.category_manager.fetch_categories(site_name)
|
||||||
|
|||||||
Reference in New Issue
Block a user