Add strict confidence filtering option

### New Feature:
- --strict flag for exact confidence matching
- Default: Medium = Medium + High (or better)
- Strict: Medium = Medium only (exact match)

### Usage:
./seo category_apply -s mistergeek.net -c Medium      # Medium or better
./seo category_apply -s mistergeek.net -c Medium --strict  # Medium only

### Example Output:
# Default (or better):
Filtered to 328 proposals (confidence >= Medium)

# Strict mode:
Filtered to 156 proposals (confidence = Medium, strict mode)

### Benefits:
- More precise control over which posts to update
- Can process confidence levels separately
- Better for batch processing in stages

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
Kevin Bataille
2026-02-16 17:13:06 +01:00
parent b265125656
commit 54168a1c00
3 changed files with 34 additions and 11 deletions

View File

@@ -334,6 +334,7 @@ class CategoryAssignmentProcessor:
def process_proposals(self, proposals: List[Dict], site_name: str,
confidence_threshold: str = 'Medium',
strict: bool = False,
dry_run: bool = False) -> Dict[str, int]:
"""
Process AI category proposals and apply to WordPress.
@@ -342,6 +343,7 @@ class CategoryAssignmentProcessor:
proposals: List of proposal dicts from CSV
site_name: Site to apply changes to (filters proposals)
confidence_threshold: Minimum confidence to apply (High, Medium, Low)
strict: If True, only match exact confidence level
dry_run: If True, don't actually make changes
Returns:
@@ -362,15 +364,23 @@ class CategoryAssignmentProcessor:
logger.info(f"Filtered to {len(proposals)} posts on {site_name} ({filtered_by_site} excluded from other sites)")
# Filter by confidence
confidence_order = {'High': 3, 'Medium': 2, 'Low': 1}
min_confidence = confidence_order.get(confidence_threshold, 2)
filtered_proposals = [
p for p in proposals
if confidence_order.get(p.get('category_confidence', 'Medium'), 2) >= min_confidence
]
logger.info(f"Filtered to {len(filtered_proposals)} proposals (confidence >= {confidence_threshold})")
if strict:
# Exact match only
filtered_proposals = [
p for p in proposals
if p.get('category_confidence', 'Medium') == confidence_threshold
]
logger.info(f"Filtered to {len(filtered_proposals)} proposals (confidence = {confidence_threshold}, strict mode)")
else:
# Medium or better (default behavior)
confidence_order = {'High': 3, 'Medium': 2, 'Low': 1}
min_confidence = confidence_order.get(confidence_threshold, 2)
filtered_proposals = [
p for p in proposals
if confidence_order.get(p.get('category_confidence', 'Medium'), 2) >= min_confidence
]
logger.info(f"Filtered to {len(filtered_proposals)} proposals (confidence >= {confidence_threshold})")
# Fetch existing categories
self.category_manager.fetch_categories(site_name)
@@ -434,6 +444,7 @@ class CategoryAssignmentProcessor:
def run(self, proposals_csv: str, site_name: str,
confidence_threshold: str = 'Medium',
strict: bool = False,
dry_run: bool = False) -> Dict[str, int]:
"""
Run complete category assignment process.
@@ -442,6 +453,7 @@ class CategoryAssignmentProcessor:
proposals_csv: Path to proposals CSV
site_name: Site to apply changes to
confidence_threshold: Minimum confidence to apply
strict: If True, only match exact confidence level
dry_run: If True, preview changes without applying
Returns:
@@ -457,5 +469,6 @@ class CategoryAssignmentProcessor:
proposals,
site_name,
confidence_threshold,
dry_run
strict=strict,
dry_run=dry_run
)