Add strict confidence filtering option

### New Feature: - --strict flag for exact confidence matching - Default: Medium = Medium + High (or better) - Strict: Medium = Medium only (exact match) ### Usage: ./seo category_apply -s mistergeek.net -c Medium # Medium or better ./seo category_apply -s mistergeek.net -c Medium --strict # Medium only ### Example Output: # Default (or better): Filtered to 328 proposals (confidence >= Medium) # Strict mode: Filtered to 156 proposals (confidence = Medium, strict mode) ### Benefits: - More precise control over which posts to update - Can process confidence levels separately - Better for batch processing in stages Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
2026-02-16 17:13:06 +01:00
parent b265125656
commit 54168a1c00
3 changed files with 34 additions and 11 deletions
--- a/src/seo/app.py
+++ b/src/seo/app.py
@@ -92,7 +92,8 @@ class SEOApp:
        return proposer.run(output_file=output)
    
    def category_apply(self, proposals_csv: str, site_name: str,
-                      confidence: str = 'Medium', dry_run: bool = False) -> dict:
+                      confidence: str = 'Medium', strict: bool = False,
+                      dry_run: bool = False) -> dict:
        """
        Apply AI category proposals to WordPress.
        
@@ -100,6 +101,7 @@ class SEOApp:
            proposals_csv: Path to proposals CSV
            site_name: Site to apply changes to (mistergeek.net, webscroll.fr, hellogeek.net)
            confidence: Minimum confidence level (High, Medium, Low)
+            strict: If True, only match exact confidence (not "or better")
            dry_run: If True, preview changes without applying
            
        Returns:
@@ -112,6 +114,7 @@ class SEOApp:
            proposals_csv=proposals_csv,
            site_name=site_name,
            confidence_threshold=confidence,
+            strict=strict,
            dry_run=dry_run
        )
        
--- a/src/seo/category_manager.py
+++ b/src/seo/category_manager.py
@@ -334,6 +334,7 @@ class CategoryAssignmentProcessor:

    def process_proposals(self, proposals: List[Dict], site_name: str,
                         confidence_threshold: str = 'Medium',
+                         strict: bool = False,
                         dry_run: bool = False) -> Dict[str, int]:
        """
        Process AI category proposals and apply to WordPress.
@@ -342,6 +343,7 @@ class CategoryAssignmentProcessor:
            proposals: List of proposal dicts from CSV
            site_name: Site to apply changes to (filters proposals)
            confidence_threshold: Minimum confidence to apply (High, Medium, Low)
+            strict: If True, only match exact confidence level
            dry_run: If True, don't actually make changes
            
        Returns:
@@ -362,15 +364,23 @@ class CategoryAssignmentProcessor:
        logger.info(f"Filtered to {len(proposals)} posts on {site_name} ({filtered_by_site} excluded from other sites)")
        
        # Filter by confidence
-        confidence_order = {'High': 3, 'Medium': 2, 'Low': 1}
-        min_confidence = confidence_order.get(confidence_threshold, 2)
-        
-        filtered_proposals = [
-            p for p in proposals
-            if confidence_order.get(p.get('category_confidence', 'Medium'), 2) >= min_confidence
-        ]
-        
-        logger.info(f"Filtered to {len(filtered_proposals)} proposals (confidence >= {confidence_threshold})")
+        if strict:
+            # Exact match only
+            filtered_proposals = [
+                p for p in proposals
+                if p.get('category_confidence', 'Medium') == confidence_threshold
+            ]
+            logger.info(f"Filtered to {len(filtered_proposals)} proposals (confidence = {confidence_threshold}, strict mode)")
+        else:
+            # Medium or better (default behavior)
+            confidence_order = {'High': 3, 'Medium': 2, 'Low': 1}
+            min_confidence = confidence_order.get(confidence_threshold, 2)
+            
+            filtered_proposals = [
+                p for p in proposals
+                if confidence_order.get(p.get('category_confidence', 'Medium'), 2) >= min_confidence
+            ]
+            logger.info(f"Filtered to {len(filtered_proposals)} proposals (confidence >= {confidence_threshold})")
        
        # Fetch existing categories
        self.category_manager.fetch_categories(site_name)
@@ -434,6 +444,7 @@ class CategoryAssignmentProcessor:

    def run(self, proposals_csv: str, site_name: str,
            confidence_threshold: str = 'Medium',
+            strict: bool = False,
            dry_run: bool = False) -> Dict[str, int]:
        """
        Run complete category assignment process.
@@ -442,6 +453,7 @@ class CategoryAssignmentProcessor:
            proposals_csv: Path to proposals CSV
            site_name: Site to apply changes to
            confidence_threshold: Minimum confidence to apply
+            strict: If True, only match exact confidence level
            dry_run: If True, preview changes without applying
            
        Returns:
@@ -457,5 +469,6 @@ class CategoryAssignmentProcessor:
            proposals, 
            site_name, 
            confidence_threshold,
-            dry_run
+            strict=strict,
+            dry_run=dry_run
        )
--- a/src/seo/cli.py
+++ b/src/seo/cli.py
@@ -47,6 +47,7 @@ Examples:
    parser.add_argument('--site', '-s', choices=['mistergeek.net', 'webscroll.fr', 'hellogeek.net'],
                       help='WordPress site for category operations')
    parser.add_argument('--description', '-d', help='Category description')
+    parser.add_argument('--strict', action='store_true', help='Strict confidence matching (exact match only)')

    args = parser.parse_args()

@@ -160,6 +161,8 @@ def cmd_category_apply(app, args):
        print("Would apply category proposals to WordPress")
        print(f"  Site: {args.site}")
        print(f"  Confidence: {args.confidence}")
+        if args.strict:
+            print(f"  Strict mode: Yes (exact match only)")
        return 0
    
    if not args.site:
@@ -180,11 +183,14 @@ def cmd_category_apply(app, args):
    print(f"Applying categories from: {proposals_csv}")
    print(f"Site: {args.site}")
    print(f"Confidence threshold: {args.confidence}")
+    if args.strict:
+        print(f"Strict mode: Yes (exact match only)")
    
    stats = app.category_apply(
        proposals_csv=proposals_csv,
        site_name=args.site,
        confidence=args.confidence,
+        strict=args.strict,
        dry_run=False
    )
    
@@ -307,6 +313,7 @@ Options:
  --confidence, -c          Confidence threshold: High, Medium, Low
  --site, -s                WordPress site: mistergeek.net, webscroll.fr, hellogeek.net
  --description, -d         Category description
+  --strict                  Strict confidence matching (exact match only, not "or better")

 Examples:
  seo export