Fix category_apply - Filter by site and show article titles

- Filter proposals by current_site (only applies to selected site)
- Show article title for each post
- Show current and proposed categories
- Better error logging

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
Kevin Bataille
2026-02-16 16:36:51 +01:00
parent fa700cba98
commit b265125656

View File

@@ -340,7 +340,7 @@ class CategoryAssignmentProcessor:
Args: Args:
proposals: List of proposal dicts from CSV proposals: List of proposal dicts from CSV
site_name: Site to apply changes to site_name: Site to apply changes to (filters proposals)
confidence_threshold: Minimum confidence to apply (High, Medium, Low) confidence_threshold: Minimum confidence to apply (High, Medium, Low)
dry_run: If True, don't actually make changes dry_run: If True, don't actually make changes
@@ -354,6 +354,13 @@ class CategoryAssignmentProcessor:
if dry_run: if dry_run:
logger.info("DRY RUN - No changes will be made") logger.info("DRY RUN - No changes will be made")
# Filter by site
original_count = len(proposals)
proposals = [p for p in proposals if p.get('current_site', '') == site_name]
filtered_by_site = original_count - len(proposals)
logger.info(f"Filtered to {len(proposals)} posts on {site_name} ({filtered_by_site} excluded from other sites)")
# Filter by confidence # Filter by confidence
confidence_order = {'High': 3, 'Medium': 2, 'Low': 1} confidence_order = {'High': 3, 'Medium': 2, 'Low': 1}
min_confidence = confidence_order.get(confidence_threshold, 2) min_confidence = confidence_order.get(confidence_threshold, 2)
@@ -370,20 +377,23 @@ class CategoryAssignmentProcessor:
# Process each proposal # Process each proposal
for i, proposal in enumerate(filtered_proposals, 1): for i, proposal in enumerate(filtered_proposals, 1):
logger.info(f"\n[{i}/{len(filtered_proposals)}] Processing post {proposal.get('post_id')}...") post_title = proposal.get('title', 'Unknown')[:60]
post_id = proposal.get('post_id', '')
post_id = int(proposal.get('post_id', 0))
proposed_category = proposal.get('proposed_category', '') proposed_category = proposal.get('proposed_category', '')
current_categories = proposal.get('current_categories', '') current_categories = proposal.get('current_categories', '')
confidence = proposal.get('category_confidence', 'Medium') confidence = proposal.get('category_confidence', 'Medium')
logger.info(f"\n[{i}/{len(filtered_proposals)}] Post {post_id}: {post_title}...")
logger.info(f" Current categories: {current_categories}")
logger.info(f" Proposed: {proposed_category} (confidence: {confidence})")
if not post_id or not proposed_category: if not post_id or not proposed_category:
logger.warning(" Skipping: Missing post_id or proposed_category") logger.warning(" Skipping: Missing post_id or proposed_category")
self.processing_stats['errors'] += 1 self.processing_stats['errors'] += 1
continue continue
if dry_run: if dry_run:
logger.info(f" Would assign to: {proposed_category}") logger.info(f" [DRY RUN] Would assign to: {proposed_category}")
continue continue
# Get or create the category # Get or create the category
@@ -404,9 +414,10 @@ class CategoryAssignmentProcessor:
logger.info(f" ✓ Assigned to '{proposed_category}'") logger.info(f" ✓ Assigned to '{proposed_category}'")
else: else:
self.processing_stats['errors'] += 1 self.processing_stats['errors'] += 1
logger.error(f" ✗ Failed to assign category")
else: else:
self.processing_stats['errors'] += 1 self.processing_stats['errors'] += 1
logger.error(f" Failed to get/create category '{proposed_category}'") logger.error(f" Failed to get/create category '{proposed_category}'")
self.processing_stats['total_posts'] = len(filtered_proposals) self.processing_stats['total_posts'] = len(filtered_proposals)