Files
seo/check_confidence.py
Kevin Bataille 06d660f9c8 Add confidence breakdown display
- Shows High/Medium/Low count breakdown
- Helps verify all matching posts will be processed
- Example output:
  Filtered to 328 proposals (confidence >= Medium)
    Breakdown: High=293, Medium=35, Low=0

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
2026-02-16 18:21:16 +01:00

35 lines
1.3 KiB
Python

#!/usr/bin/env python3
import csv
from collections import Counter
import glob
files = sorted(glob.glob('output/category_proposals_*.csv'))
if files:
with open(files[-1], 'r') as f:
reader = csv.DictReader(f)
proposals = list(reader)
print("=== All Proposals ===")
print(f"Total: {len(proposals)}\n")
print("By Site:")
sites = Counter(p['current_site'] for p in proposals)
for site, count in sorted(sites.items()):
print(f" {site}: {count}")
print("\nBy Confidence (all sites):")
confs = Counter(p['category_confidence'] for p in proposals)
for conf, count in sorted(confs.items()):
print(f" {conf}: {count}")
print("\nBy Site and Confidence:")
for site in ['mistergeek.net', 'webscroll.fr', 'hellogeek.net']:
site_props = [p for p in proposals if p['current_site'] == site]
confs = Counter(p['category_confidence'] for p in site_props)
print(f"\n {site} ({len(site_props)} total):")
for conf, count in sorted(confs.items()):
print(f" {conf}: {count}")
medium_or_better = [p for p in site_props if p['category_confidence'] in ['High', 'Medium']]
print(f" → Would process with -c Medium (default): {len(medium_or_better)}")