Add strict confidence filtering option

### New Feature: - --strict flag for exact confidence matching - Default: Medium = Medium + High (or better) - Strict: Medium = Medium only (exact match) ### Usage: ./seo category_apply -s mistergeek.net -c Medium # Medium or better ./seo category_apply -s mistergeek.net -c Medium --strict # Medium only ### Example Output: # Default (or better): Filtered to 328 proposals (confidence >= Medium) # Strict mode: Filtered to 156 proposals (confidence = Medium, strict mode) ### Benefits: - More precise control over which posts to update - Can process confidence levels separately - Better for batch processing in stages Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
2026-02-16 17:13:06 +01:00
parent b265125656
commit 54168a1c00
3 changed files with 34 additions and 11 deletions
--- a/src/seo/category_manager.py
+++ b/src/seo/category_manager.py
@@ -334,6 +334,7 @@ class CategoryAssignmentProcessor:

    def process_proposals(self, proposals: List[Dict], site_name: str,
                         confidence_threshold: str = 'Medium',
+                         strict: bool = False,
                         dry_run: bool = False) -> Dict[str, int]:
        """
        Process AI category proposals and apply to WordPress.
@@ -342,6 +343,7 @@ class CategoryAssignmentProcessor:
            proposals: List of proposal dicts from CSV
            site_name: Site to apply changes to (filters proposals)
            confidence_threshold: Minimum confidence to apply (High, Medium, Low)
+            strict: If True, only match exact confidence level
            dry_run: If True, don't actually make changes
            
        Returns:
@@ -362,15 +364,23 @@ class CategoryAssignmentProcessor:
        logger.info(f"Filtered to {len(proposals)} posts on {site_name} ({filtered_by_site} excluded from other sites)")
        
        # Filter by confidence
-        confidence_order = {'High': 3, 'Medium': 2, 'Low': 1}
-        min_confidence = confidence_order.get(confidence_threshold, 2)
-        
-        filtered_proposals = [
-            p for p in proposals
-            if confidence_order.get(p.get('category_confidence', 'Medium'), 2) >= min_confidence
-        ]
-        
-        logger.info(f"Filtered to {len(filtered_proposals)} proposals (confidence >= {confidence_threshold})")
+        if strict:
+            # Exact match only
+            filtered_proposals = [
+                p for p in proposals
+                if p.get('category_confidence', 'Medium') == confidence_threshold
+            ]
+            logger.info(f"Filtered to {len(filtered_proposals)} proposals (confidence = {confidence_threshold}, strict mode)")
+        else:
+            # Medium or better (default behavior)
+            confidence_order = {'High': 3, 'Medium': 2, 'Low': 1}
+            min_confidence = confidence_order.get(confidence_threshold, 2)
+            
+            filtered_proposals = [
+                p for p in proposals
+                if confidence_order.get(p.get('category_confidence', 'Medium'), 2) >= min_confidence
+            ]
+            logger.info(f"Filtered to {len(filtered_proposals)} proposals (confidence >= {confidence_threshold})")
        
        # Fetch existing categories
        self.category_manager.fetch_categories(site_name)
@@ -434,6 +444,7 @@ class CategoryAssignmentProcessor:

    def run(self, proposals_csv: str, site_name: str,
            confidence_threshold: str = 'Medium',
+            strict: bool = False,
            dry_run: bool = False) -> Dict[str, int]:
        """
        Run complete category assignment process.
@@ -442,6 +453,7 @@ class CategoryAssignmentProcessor:
            proposals_csv: Path to proposals CSV
            site_name: Site to apply changes to
            confidence_threshold: Minimum confidence to apply
+            strict: If True, only match exact confidence level
            dry_run: If True, preview changes without applying
            
        Returns:
@@ -457,5 +469,6 @@ class CategoryAssignmentProcessor:
            proposals, 
            site_name, 
            confidence_threshold,
-            dry_run
+            strict=strict,
+            dry_run=dry_run
        )