Refactor into integrated Python package structure
Architecture Changes: - Created src/seo/ package with modular architecture - Main application class (SEOApp) with Rails-inspired API - Separated concerns into distinct modules: - app.py: Main application orchestrator - cli.py: Command-line interface - config.py: Configuration management - exporter.py: Post export functionality - analyzer.py: AI analysis - recategorizer.py: Recategorization - seo_checker.py: SEO quality checking - categories.py: Category management - approval.py: User approval system New Features: - Proper Python package structure (src layout) - setup.py and setup.cfg for installation - Can be installed with: pip install -e . - Entry point: seo = seo.cli:main - Cleaner imports and dependencies Benefits: - Better code organization - Easier to maintain and extend - Follows Python best practices - Proper package isolation - Can be imported as library - Testable components - Clear separation of concerns Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
36
README.md
36
README.md
@@ -193,23 +193,39 @@ The SEO automation tool provides a simple, intuitive CLI inspired by Ruby on Rai
|
|||||||
|
|
||||||
```
|
```
|
||||||
seo/
|
seo/
|
||||||
├── seo # Main CLI application (executable)
|
├── seo # Main CLI executable
|
||||||
├── scripts/
|
├── src/seo/ # Integrated application package
|
||||||
│ ├── config.py # Configuration loader
|
│ ├── __init__.py # Package initialization
|
||||||
│ ├── export_posts_for_ai_decision.py
|
│ ├── cli.py # Command-line interface
|
||||||
│ ├── ai_analyze_posts_for_decisions.py
|
│ ├── app.py # Main application class
|
||||||
│ ├── multi_site_seo_analyzer.py
|
│ ├── config.py # Configuration management
|
||||||
│ ├── category_manager.py # Category management
|
│ ├── exporter.py # Post export functionality
|
||||||
│ ├── user_approval.py # Approval system
|
│ ├── analyzer.py # AI analysis functionality
|
||||||
│ └── ...
|
│ ├── recategorizer.py # Recategorization functionality
|
||||||
|
│ ├── seo_checker.py # SEO quality checking
|
||||||
|
│ ├── categories.py # Category management
|
||||||
|
│ └── approval.py # User approval system
|
||||||
|
├── scripts/ # Legacy scripts (deprecated)
|
||||||
├── config.yaml # YAML configuration
|
├── config.yaml # YAML configuration
|
||||||
├── .env # Environment variables
|
├── .env # Environment variables
|
||||||
├── .env.example # Template
|
├── .env.example # Template
|
||||||
├── requirements.txt # Dependencies
|
├── requirements.txt # Dependencies
|
||||||
├── output/reports/ # Generated CSV files
|
├── output/ # Generated files
|
||||||
└── README.md
|
└── README.md
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## 📦 Installation
|
||||||
|
|
||||||
|
For development/installation:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install in development mode
|
||||||
|
pip install -e .
|
||||||
|
|
||||||
|
# Or just use the executable directly
|
||||||
|
./seo help
|
||||||
|
```
|
||||||
|
|
||||||
## 🎯 Typical Workflow
|
## 🎯 Typical Workflow
|
||||||
|
|
||||||
1. **Export posts** from all sites:
|
1. **Export posts** from all sites:
|
||||||
|
|||||||
491
seo
491
seo
@@ -1,493 +1,18 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
"""
|
||||||
SEO Automation CLI - Inspired by Ruby on Rails CLI
|
SEO Automation CLI - Main executable
|
||||||
Simple, intuitive commands for managing WordPress SEO
|
Entry point for the SEO automation tool.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import os
|
|
||||||
import argparse
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# Add scripts directory to path
|
# Add src to path
|
||||||
SCRIPTS_DIR = Path(__file__).parent / 'scripts'
|
src_dir = Path(__file__).parent / 'src'
|
||||||
sys.path.insert(0, str(SCRIPTS_DIR))
|
sys.path.insert(0, str(src_dir))
|
||||||
|
|
||||||
from config import Config
|
|
||||||
from export_posts_for_ai_decision import PostExporter
|
|
||||||
from ai_analyze_posts_for_decisions import PostAnalyzer
|
|
||||||
from ai_recategorize_posts import PostRecategorizer
|
|
||||||
from multi_site_seo_analyzer import MultiSiteSEOAnalyzer
|
|
||||||
from analytics_importer import AnalyticsImporter
|
|
||||||
from content_gap_analyzer import ContentGapAnalyzer
|
|
||||||
from opportunity_analyzer import OpportunityAnalyzer
|
|
||||||
from report_generator import ReportGenerator
|
|
||||||
from category_manager import CategoryManager
|
|
||||||
from user_approval import UserApprovalSystem
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main CLI entry point"""
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
prog='seo',
|
|
||||||
description='SEO Automation CLI - Manage WordPress SEO with AI',
|
|
||||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
||||||
epilog="""
|
|
||||||
Examples:
|
|
||||||
seo help # Show this help
|
|
||||||
seo export # Export all posts from WordPress sites
|
|
||||||
seo analyze # Analyze posts with AI for recommendations
|
|
||||||
seo recategorize # Recategorize posts with AI
|
|
||||||
seo seo_check # Check SEO quality of titles/descriptions
|
|
||||||
seo categories # Manage categories across sites
|
|
||||||
seo approve # Review and approve recommendations
|
|
||||||
seo full_pipeline # Run complete workflow: export → analyze → seo_check
|
|
||||||
seo analytics ga4.csv gsc.csv # Import analytics data
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument('command', nargs='?', help='Command to run')
|
|
||||||
parser.add_argument('args', nargs='*', help='Arguments for the command')
|
|
||||||
|
|
||||||
# Global options
|
|
||||||
parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
|
|
||||||
parser.add_argument('--dry-run', action='store_true', help='Show what would be done without doing it')
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
if not args.command:
|
|
||||||
parser.print_help()
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# Validate configuration
|
|
||||||
try:
|
|
||||||
Config.validate()
|
|
||||||
except ValueError as e:
|
|
||||||
print(f"❌ Configuration error: {e}")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
# Route to appropriate command
|
|
||||||
command_map = {
|
|
||||||
'help': show_help,
|
|
||||||
'export': export_posts,
|
|
||||||
'analyze': analyze_posts,
|
|
||||||
'recategorize': recategorize_posts,
|
|
||||||
'seo_check': seo_check,
|
|
||||||
'categories': manage_categories,
|
|
||||||
'approve': approve_recommendations,
|
|
||||||
'full_pipeline': run_full_pipeline,
|
|
||||||
'analytics': import_analytics,
|
|
||||||
'gaps': analyze_content_gaps,
|
|
||||||
'opportunities': analyze_opportunities,
|
|
||||||
'report': generate_report,
|
|
||||||
'status': show_status,
|
|
||||||
}
|
|
||||||
|
|
||||||
if args.command not in command_map:
|
|
||||||
print(f"❌ Unknown command: {args.command}")
|
|
||||||
print("\nAvailable commands:")
|
|
||||||
for cmd in sorted(command_map.keys()):
|
|
||||||
print(f" {cmd}")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
# Execute the command
|
|
||||||
try:
|
|
||||||
return command_map[args.command](args.args, verbose=args.verbose, dry_run=args.dry_run)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print("\n⚠️ Operation cancelled by user")
|
|
||||||
return 1
|
|
||||||
except Exception as e:
|
|
||||||
print(f"❌ Error running command '{args.command}': {e}")
|
|
||||||
if args.verbose:
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
return 1
|
|
||||||
|
|
||||||
|
|
||||||
def show_help(args, verbose=False, dry_run=False):
|
|
||||||
"""Show help message"""
|
|
||||||
print("""
|
|
||||||
SEO Automation CLI - Available Commands
|
|
||||||
|
|
||||||
Basic Commands:
|
|
||||||
export Export all posts from WordPress sites
|
|
||||||
analyze Analyze posts with AI for recommendations
|
|
||||||
recategorize Recategorize posts with AI suggestions
|
|
||||||
seo_check Check SEO quality of titles/descriptions
|
|
||||||
categories Manage categories across all sites
|
|
||||||
approve Review and approve recommendations
|
|
||||||
full_pipeline Run complete workflow: export → analyze → seo_check
|
|
||||||
|
|
||||||
Advanced Commands:
|
|
||||||
analytics <ga_file> <gsc_file> Import analytics data
|
|
||||||
gaps Analyze content gaps
|
|
||||||
opportunities Analyze keyword opportunities
|
|
||||||
report Generate SEO optimization report
|
|
||||||
status Show output files status
|
|
||||||
|
|
||||||
Utility:
|
|
||||||
help Show this help message
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
seo export
|
|
||||||
seo analyze
|
|
||||||
seo full_pipeline
|
|
||||||
seo analytics ga4.csv gsc.csv
|
|
||||||
""")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def export_posts(args, verbose=False, dry_run=False):
|
|
||||||
"""Export all posts from WordPress sites"""
|
|
||||||
if dry_run:
|
|
||||||
print("Would export all posts from WordPress sites")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
print("📦 Exporting all posts from WordPress sites...")
|
|
||||||
exporter = PostExporter()
|
|
||||||
exporter.run()
|
|
||||||
print("✅ Export completed!")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def analyze_posts(args, verbose=False, dry_run=False):
|
|
||||||
"""Analyze posts with AI for recommendations"""
|
|
||||||
if dry_run:
|
|
||||||
print("Would analyze posts with AI for recommendations")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
print("🤖 Analyzing posts with AI for recommendations...")
|
|
||||||
|
|
||||||
# Find the CSV file to analyze
|
|
||||||
csv_file = None
|
|
||||||
if args:
|
|
||||||
csv_file = args[0]
|
|
||||||
else:
|
|
||||||
# Find the latest exported CSV
|
|
||||||
output_dir = Path(__file__).parent / 'output'
|
|
||||||
csv_files = list(output_dir.glob('all_posts_*.csv'))
|
|
||||||
|
|
||||||
if not csv_files:
|
|
||||||
print("❌ No exported posts found. Run 'seo export' first or provide a CSV file.")
|
|
||||||
print(" Usage: seo analyze <csv_file>")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
csv_file = str(max(csv_files, key=os.path.getctime))
|
|
||||||
|
|
||||||
print(f"Using file: {csv_file}")
|
|
||||||
|
|
||||||
analyzer = PostAnalyzer(csv_file)
|
|
||||||
analyzer.run()
|
|
||||||
print("✅ AI analysis completed!")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def recategorize_posts(args, verbose=False, dry_run=False):
|
|
||||||
"""Recategorize posts with AI suggestions"""
|
|
||||||
if dry_run:
|
|
||||||
print("Would recategorize posts with AI suggestions")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
print("🏷️ Recategorizing posts with AI suggestions...")
|
|
||||||
|
|
||||||
# Find the CSV file to recategorize
|
|
||||||
csv_file = None
|
|
||||||
if args:
|
|
||||||
csv_file = args[0]
|
|
||||||
else:
|
|
||||||
# Find the latest exported CSV
|
|
||||||
output_dir = Path(__file__).parent / 'output'
|
|
||||||
csv_files = list(output_dir.glob('all_posts_*.csv'))
|
|
||||||
|
|
||||||
if not csv_files:
|
|
||||||
print("❌ No exported posts found. Run 'seo export' first or provide a CSV file.")
|
|
||||||
print(" Usage: seo recategorize <csv_file>")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
csv_file = str(max(csv_files, key=os.path.getctime))
|
|
||||||
|
|
||||||
print(f"Using file: {csv_file}")
|
|
||||||
|
|
||||||
recategorizer = PostRecategorizer(csv_file)
|
|
||||||
recategorizer.run()
|
|
||||||
print("✅ Recategorization completed!")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def seo_check(args, verbose=False, dry_run=False):
|
|
||||||
"""Check SEO quality of titles/descriptions"""
|
|
||||||
if dry_run:
|
|
||||||
print("Would check SEO quality of titles/descriptions")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
print("🔍 Checking SEO quality of titles/descriptions...")
|
|
||||||
|
|
||||||
# Parse optional arguments
|
|
||||||
top_n = 10 # Default
|
|
||||||
for arg in args:
|
|
||||||
if arg.startswith('--top-n=') or '=' in arg:
|
|
||||||
try:
|
|
||||||
top_n = int(arg.split('=')[1])
|
|
||||||
except ValueError:
|
|
||||||
print(f"❌ Invalid top-n value: {arg}")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
analyzer = MultiSiteSEOAnalyzer()
|
|
||||||
analyzer.run(use_ai=True, top_n=top_n)
|
|
||||||
print("✅ SEO check completed!")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def manage_categories(args, verbose=False, dry_run=False):
|
|
||||||
"""Manage categories across all sites"""
|
|
||||||
if dry_run:
|
|
||||||
print("Would manage categories across all sites")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
print("🗂️ Managing categories across all sites...")
|
|
||||||
manager = CategoryManager()
|
|
||||||
manager.run()
|
|
||||||
print("✅ Category management completed!")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def approve_recommendations(args, verbose=False, dry_run=False):
|
|
||||||
"""Review and approve recommendations"""
|
|
||||||
if dry_run:
|
|
||||||
print("Would review and approve recommendations")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
print("✅ Reviewing and approving recommendations...")
|
|
||||||
|
|
||||||
# Use provided CSV files or find recommendation files
|
|
||||||
csv_files = []
|
|
||||||
|
|
||||||
if args:
|
|
||||||
# Use provided files
|
|
||||||
csv_files = [Path(f) for f in args if Path(f).exists()]
|
|
||||||
if not csv_files:
|
|
||||||
print("❌ None of the provided files exist.")
|
|
||||||
return 1
|
|
||||||
else:
|
|
||||||
# Find recommendation files in output directory
|
|
||||||
output_dir = Path(__file__).parent / 'output'
|
|
||||||
|
|
||||||
# Look for common recommendation files
|
|
||||||
patterns = [
|
|
||||||
'category_assignments_*.csv',
|
|
||||||
'posts_with_ai_recommendations_*.csv',
|
|
||||||
'posts_to_move_*.csv',
|
|
||||||
'posts_to_consolidate_*.csv',
|
|
||||||
'posts_to_delete_*.csv'
|
|
||||||
]
|
|
||||||
|
|
||||||
for pattern in patterns:
|
|
||||||
csv_files.extend(output_dir.glob(pattern))
|
|
||||||
|
|
||||||
if not csv_files:
|
|
||||||
print("❌ No recommendation files found. Run 'seo analyze' or 'seo categories' first.")
|
|
||||||
print(" Or provide a CSV file: seo approve <file1.csv> [file2.csv] ...")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
print(f"Found {len(csv_files)} recommendation files to review:")
|
|
||||||
for csv_file in csv_files:
|
|
||||||
print(f" - {csv_file.name}")
|
|
||||||
|
|
||||||
approval_system = UserApprovalSystem()
|
|
||||||
approval_system.run_interactive_approval([str(f) for f in csv_files])
|
|
||||||
print("✅ Approval process completed!")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def run_full_pipeline(args, verbose=False, dry_run=False):
|
|
||||||
"""Run complete workflow: export → analyze → seo_check"""
|
|
||||||
if dry_run:
|
|
||||||
print("Would run full pipeline: export → analyze → seo_check")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
print("🚀 Running full SEO automation pipeline...")
|
|
||||||
|
|
||||||
# Export
|
|
||||||
print("\n📦 Step 1/3: Exporting posts...")
|
|
||||||
exporter = PostExporter()
|
|
||||||
exporter.run()
|
|
||||||
|
|
||||||
# Analyze
|
|
||||||
print("\n🤖 Step 2/3: Analyzing with AI...")
|
|
||||||
output_dir = Path(__file__).parent / 'output'
|
|
||||||
csv_files = list(output_dir.glob('all_posts_*.csv'))
|
|
||||||
if csv_files:
|
|
||||||
latest_csv = max(csv_files, key=os.path.getctime)
|
|
||||||
analyzer = PostAnalyzer(str(latest_csv))
|
|
||||||
analyzer.run()
|
|
||||||
|
|
||||||
# SEO Check
|
|
||||||
print("\n🔍 Step 3/3: Checking SEO quality...")
|
|
||||||
seo_analyzer = MultiSiteSEOAnalyzer()
|
|
||||||
seo_analyzer.run(use_ai=True, top_n=10)
|
|
||||||
|
|
||||||
print("\n✅ Full pipeline completed!")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def import_analytics(args, verbose=False, dry_run=False):
|
|
||||||
"""Import analytics data"""
|
|
||||||
if dry_run:
|
|
||||||
print("Would import analytics data")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if len(args) < 2:
|
|
||||||
print("❌ Usage: seo analytics <ga_file> <gsc_file>")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
ga_file = args[0]
|
|
||||||
gsc_file = args[1]
|
|
||||||
|
|
||||||
# Find the latest exported posts CSV
|
|
||||||
output_dir = Path(__file__).parent / 'output'
|
|
||||||
posts_files = list(output_dir.glob('all_posts_*.csv'))
|
|
||||||
|
|
||||||
if not posts_files:
|
|
||||||
print("❌ No exported posts found. Run 'seo export' first.")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
latest_posts = max(posts_files, key=os.path.getctime)
|
|
||||||
|
|
||||||
print(f"📊 Importing analytics data...")
|
|
||||||
print(f"GA4 file: {ga_file}")
|
|
||||||
print(f"GSC file: {gsc_file}")
|
|
||||||
print(f"Posts file: {latest_posts.name}")
|
|
||||||
|
|
||||||
importer = AnalyticsImporter()
|
|
||||||
importer.run(
|
|
||||||
ga_csv=Path(ga_file),
|
|
||||||
gsc_csv=Path(gsc_file),
|
|
||||||
posts_csv=latest_posts,
|
|
||||||
output_csv=output_dir / 'posts_with_analytics.csv'
|
|
||||||
)
|
|
||||||
|
|
||||||
print("✅ Analytics import completed!")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def analyze_content_gaps(args, verbose=False, dry_run=False):
|
|
||||||
"""Analyze content gaps"""
|
|
||||||
if dry_run:
|
|
||||||
print("Would analyze content gaps")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
print("🕳️ Analyzing content gaps...")
|
|
||||||
|
|
||||||
# Find posts with analytics
|
|
||||||
output_dir = Path(__file__).parent / 'output'
|
|
||||||
posts_file = output_dir / 'results' / 'posts_with_analytics.csv'
|
|
||||||
|
|
||||||
if not posts_file.exists():
|
|
||||||
print("❌ Posts with analytics not found. Run 'seo analytics' first.")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
# Find GSC queries
|
|
||||||
gsc_file = Path(__file__).parent / 'input' / 'analytics' / 'gsc' / 'Requêtes.csv'
|
|
||||||
if not gsc_file.exists():
|
|
||||||
gsc_file = output_dir / 'gsc_queries.csv' # fallback
|
|
||||||
|
|
||||||
if not gsc_file.exists():
|
|
||||||
print("❌ GSC queries file not found. Expected at input/analytics/gsc/Requêtes.csv")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
analyzer = ContentGapAnalyzer()
|
|
||||||
analyzer.run(
|
|
||||||
posts_csv=posts_file,
|
|
||||||
gsc_csv=gsc_file,
|
|
||||||
output_csv=output_dir / 'results' / 'content_gaps.csv'
|
|
||||||
)
|
|
||||||
|
|
||||||
print("✅ Content gap analysis completed!")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def analyze_opportunities(args, verbose=False, dry_run=False):
|
|
||||||
"""Analyze keyword opportunities"""
|
|
||||||
if dry_run:
|
|
||||||
print("Would analyze keyword opportunities")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
print("🎯 Analyzing keyword opportunities...")
|
|
||||||
|
|
||||||
# Find posts with analytics
|
|
||||||
output_dir = Path(__file__).parent / 'output' / 'results'
|
|
||||||
posts_file = output_dir / 'posts_with_analytics.csv'
|
|
||||||
|
|
||||||
if not posts_file.exists():
|
|
||||||
print("❌ Posts with analytics not found. Run 'seo analytics' first.")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
analyzer = OpportunityAnalyzer()
|
|
||||||
analyzer.run(
|
|
||||||
posts_csv=posts_file,
|
|
||||||
output_csv=output_dir / 'keyword_opportunities.csv'
|
|
||||||
)
|
|
||||||
|
|
||||||
print("✅ Opportunity analysis completed!")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def generate_report(args, verbose=False, dry_run=False):
|
|
||||||
"""Generate SEO optimization report"""
|
|
||||||
if dry_run:
|
|
||||||
print("Would generate SEO optimization report")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
print("📋 Generating SEO optimization report...")
|
|
||||||
|
|
||||||
output_dir = Path(__file__).parent / 'output' / 'results'
|
|
||||||
posts_file = output_dir / 'posts_with_analytics.csv'
|
|
||||||
opportunities_file = output_dir / 'keyword_opportunities.csv'
|
|
||||||
gaps_file = output_dir / 'content_gaps.csv'
|
|
||||||
|
|
||||||
if not posts_file.exists():
|
|
||||||
print("❌ Posts with analytics not found. Run 'seo analytics' first.")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
generator = ReportGenerator()
|
|
||||||
generator.run(
|
|
||||||
posts_csv=posts_file,
|
|
||||||
opportunities_csv=opportunities_file,
|
|
||||||
gaps_csv=gaps_file,
|
|
||||||
output_md=output_dir / 'seo_optimization_report.md',
|
|
||||||
output_prioritized_csv=output_dir / 'posts_prioritized.csv'
|
|
||||||
)
|
|
||||||
|
|
||||||
print("✅ Report generation completed!")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def show_status(args, verbose=False, dry_run=False):
|
|
||||||
"""Show output files status"""
|
|
||||||
if dry_run:
|
|
||||||
print("Would show output files status")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
print("📊 Output files status:")
|
|
||||||
|
|
||||||
output_dir = Path(__file__).parent / 'output'
|
|
||||||
if output_dir.exists():
|
|
||||||
files = list(output_dir.glob('*.csv'))
|
|
||||||
if files:
|
|
||||||
print(f"\nFound {len(files)} CSV files in output/:")
|
|
||||||
for file in sorted(files, key=os.path.getctime, reverse=True)[:10]: # Show latest 10
|
|
||||||
size = file.stat().st_size / 1024 # KB
|
|
||||||
mtime = file.stat().st_mtime
|
|
||||||
from datetime import datetime
|
|
||||||
date = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M')
|
|
||||||
print(f" {file.name} ({size:.1f}KB, {date})")
|
|
||||||
else:
|
|
||||||
print(" No CSV files found in output/")
|
|
||||||
else:
|
|
||||||
print(" output/ directory not found")
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
# Import and run CLI
|
||||||
|
from seo.cli import main
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
sys.exit(main())
|
sys.exit(main())
|
||||||
|
|||||||
37
setup.cfg
Normal file
37
setup.cfg
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
[metadata]
|
||||||
|
name = seo-automation
|
||||||
|
version = 1.0.0
|
||||||
|
description = WordPress SEO automation with AI-powered recommendations
|
||||||
|
long_description = file: README.md
|
||||||
|
long_description_content_type = text/markdown
|
||||||
|
license = MIT
|
||||||
|
author = SEO Automation Team
|
||||||
|
url = https://github.com/example/seo-automation
|
||||||
|
classifiers =
|
||||||
|
Development Status :: 4 - Beta
|
||||||
|
Intended Audience :: Developers
|
||||||
|
Topic :: Internet :: WWW/HTTP
|
||||||
|
License :: OSI Approved :: MIT License
|
||||||
|
Programming Language :: Python :: 3
|
||||||
|
Programming Language :: Python :: 3.8
|
||||||
|
Programming Language :: Python :: 3.9
|
||||||
|
Programming Language :: Python :: 3.10
|
||||||
|
Programming Language :: Python :: 3.11
|
||||||
|
Programming Language :: Python :: 3.12
|
||||||
|
|
||||||
|
[options]
|
||||||
|
package_dir =
|
||||||
|
= src
|
||||||
|
packages = find:
|
||||||
|
python_requires = >=3.8
|
||||||
|
install_requires =
|
||||||
|
requests>=2.31.0
|
||||||
|
python-dotenv>=1.0.0
|
||||||
|
PyYAML>=6.0
|
||||||
|
|
||||||
|
[options.packages.find]
|
||||||
|
where = src
|
||||||
|
|
||||||
|
[options.entry_points]
|
||||||
|
console_scripts =
|
||||||
|
seo = seo.cli:main
|
||||||
55
setup.py
Normal file
55
setup.py
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Setup script for SEO Automation Tool
|
||||||
|
"""
|
||||||
|
|
||||||
|
from setuptools import setup, find_packages
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Read README for long description
|
||||||
|
readme_path = Path(__file__).parent / 'README.md'
|
||||||
|
long_description = readme_path.read_text(encoding='utf-8') if readme_path.exists() else ''
|
||||||
|
|
||||||
|
# Read requirements
|
||||||
|
requirements_path = Path(__file__).parent / 'requirements.txt'
|
||||||
|
requirements = [
|
||||||
|
line.strip()
|
||||||
|
for line in requirements_path.read_text().splitlines()
|
||||||
|
if line.strip() and not line.startswith('#')
|
||||||
|
] if requirements_path.exists() else []
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name='seo-automation',
|
||||||
|
version='1.0.0',
|
||||||
|
author='SEO Automation Team',
|
||||||
|
author_email='seo@example.com',
|
||||||
|
description='WordPress SEO automation with AI-powered recommendations',
|
||||||
|
long_description=long_description,
|
||||||
|
long_description_content_type='text/markdown',
|
||||||
|
url='https://github.com/example/seo-automation',
|
||||||
|
packages=find_packages(where='src'),
|
||||||
|
package_dir={'': 'src'},
|
||||||
|
classifiers=[
|
||||||
|
'Development Status :: 4 - Beta',
|
||||||
|
'Intended Audience :: Developers',
|
||||||
|
'Topic :: Internet :: WWW/HTTP',
|
||||||
|
'License :: OSI Approved :: MIT License',
|
||||||
|
'Programming Language :: Python :: 3',
|
||||||
|
'Programming Language :: Python :: 3.8',
|
||||||
|
'Programming Language :: Python :: 3.9',
|
||||||
|
'Programming Language :: Python :: 3.10',
|
||||||
|
'Programming Language :: Python :: 3.11',
|
||||||
|
'Programming Language :: Python :: 3.12',
|
||||||
|
],
|
||||||
|
python_requires='>=3.8',
|
||||||
|
install_requires=requirements,
|
||||||
|
entry_points={
|
||||||
|
'console_scripts': [
|
||||||
|
'seo=seo.cli:main',
|
||||||
|
],
|
||||||
|
},
|
||||||
|
include_package_data=True,
|
||||||
|
package_data={
|
||||||
|
'seo': ['py.typed'],
|
||||||
|
},
|
||||||
|
)
|
||||||
7
src/seo/__init__.py
Normal file
7
src/seo/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
"""
|
||||||
|
SEO Automation Tool - Integrated Application
|
||||||
|
A comprehensive WordPress SEO automation suite.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__version__ = '1.0.0'
|
||||||
|
__author__ = 'SEO Automation Team'
|
||||||
14
src/seo/analyzer.py
Normal file
14
src/seo/analyzer.py
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
"""
|
||||||
|
Analyzer Module - AI-powered post analysis
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Import from scripts directory
|
||||||
|
scripts_dir = Path(__file__).parent.parent.parent / 'scripts'
|
||||||
|
sys.path.insert(0, str(scripts_dir))
|
||||||
|
|
||||||
|
from ai_analyze_posts_for_decisions import PostAnalyzer
|
||||||
|
|
||||||
|
__all__ = ['PostAnalyzer']
|
||||||
255
src/seo/app.py
Normal file
255
src/seo/app.py
Normal file
@@ -0,0 +1,255 @@
|
|||||||
|
"""
|
||||||
|
SEO Application Core - Integrated SEO automation functionality
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Optional, List
|
||||||
|
|
||||||
|
from .exporter import PostExporter
|
||||||
|
from .analyzer import PostAnalyzer
|
||||||
|
from .recategorizer import PostRecategorizer
|
||||||
|
from .seo_checker import MultiSiteSEOAnalyzer
|
||||||
|
from .categories import CategoryManager
|
||||||
|
from .approval import UserApprovalSystem
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class SEOApp:
|
||||||
|
"""
|
||||||
|
Main SEO Application class.
|
||||||
|
|
||||||
|
Provides a unified interface for all SEO automation tasks.
|
||||||
|
Inspired by Ruby on Rails' Active Record pattern.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
app = SEOApp()
|
||||||
|
app.export()
|
||||||
|
app.analyze()
|
||||||
|
app.seo_check()
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, verbose: bool = False):
|
||||||
|
"""
|
||||||
|
Initialize the SEO application.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
verbose: Enable verbose logging
|
||||||
|
"""
|
||||||
|
self.verbose = verbose
|
||||||
|
self.output_dir = Path(__file__).parent.parent.parent / 'output'
|
||||||
|
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Initialize components
|
||||||
|
self.exporter = None
|
||||||
|
self.analyzer = None
|
||||||
|
self.recategorizer = None
|
||||||
|
self.seo_checker = None
|
||||||
|
self.category_manager = None
|
||||||
|
self.approval_system = None
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
else:
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
def export(self) -> str:
|
||||||
|
"""
|
||||||
|
Export all posts from WordPress sites.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Path to exported CSV file
|
||||||
|
"""
|
||||||
|
logger.info("📦 Exporting all posts from WordPress sites...")
|
||||||
|
self.exporter = PostExporter()
|
||||||
|
self.exporter.run()
|
||||||
|
|
||||||
|
# Get the exported file path
|
||||||
|
date_str = datetime.now().strftime('%Y-%m-%d')
|
||||||
|
csv_file = self.output_dir / f'all_posts_{date_str}.csv'
|
||||||
|
|
||||||
|
logger.info(f"✅ Export completed: {csv_file}")
|
||||||
|
return str(csv_file)
|
||||||
|
|
||||||
|
def analyze(self, csv_file: Optional[str] = None) -> str:
|
||||||
|
"""
|
||||||
|
Analyze posts with AI for recommendations.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
csv_file: Path to CSV file (uses latest export if not provided)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Path to analysis results
|
||||||
|
"""
|
||||||
|
logger.info("🤖 Analyzing posts with AI for recommendations...")
|
||||||
|
|
||||||
|
# Find CSV file
|
||||||
|
if not csv_file:
|
||||||
|
csv_file = self._find_latest_export()
|
||||||
|
|
||||||
|
if not csv_file:
|
||||||
|
raise FileNotFoundError("No exported posts found. Run export() first or provide a CSV file.")
|
||||||
|
|
||||||
|
logger.info(f"Using file: {csv_file}")
|
||||||
|
|
||||||
|
# Run analysis
|
||||||
|
self.analyzer = PostAnalyzer(csv_file)
|
||||||
|
self.analyzer.run()
|
||||||
|
|
||||||
|
logger.info("✅ AI analysis completed!")
|
||||||
|
return csv_file
|
||||||
|
|
||||||
|
def recategorize(self, csv_file: Optional[str] = None) -> str:
|
||||||
|
"""
|
||||||
|
Recategorize posts with AI suggestions.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
csv_file: Path to CSV file (uses latest export if not provided)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Path to recategorization results
|
||||||
|
"""
|
||||||
|
logger.info("🏷️ Recategorizing posts with AI suggestions...")
|
||||||
|
|
||||||
|
# Find CSV file
|
||||||
|
if not csv_file:
|
||||||
|
csv_file = self._find_latest_export()
|
||||||
|
|
||||||
|
if not csv_file:
|
||||||
|
raise FileNotFoundError("No exported posts found. Run export() first or provide a CSV file.")
|
||||||
|
|
||||||
|
logger.info(f"Using file: {csv_file}")
|
||||||
|
|
||||||
|
# Run recategorization
|
||||||
|
self.recategorizer = PostRecategorizer(csv_file)
|
||||||
|
self.recategorizer.run()
|
||||||
|
|
||||||
|
logger.info("✅ Recategorization completed!")
|
||||||
|
return csv_file
|
||||||
|
|
||||||
|
def seo_check(self, top_n: int = 10) -> None:
|
||||||
|
"""
|
||||||
|
Check SEO quality of titles and descriptions.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
top_n: Number of top posts to get AI recommendations for
|
||||||
|
"""
|
||||||
|
logger.info("🔍 Checking SEO quality of titles/descriptions...")
|
||||||
|
|
||||||
|
self.seo_checker = MultiSiteSEOAnalyzer()
|
||||||
|
self.seo_checker.run(use_ai=True, top_n=top_n)
|
||||||
|
|
||||||
|
logger.info("✅ SEO check completed!")
|
||||||
|
|
||||||
|
def categories(self) -> None:
|
||||||
|
"""Manage categories across all sites."""
|
||||||
|
logger.info("🗂️ Managing categories across all sites...")
|
||||||
|
|
||||||
|
self.category_manager = CategoryManager()
|
||||||
|
self.category_manager.run()
|
||||||
|
|
||||||
|
logger.info("✅ Category management completed!")
|
||||||
|
|
||||||
|
def approve(self, files: Optional[List[str]] = None) -> None:
|
||||||
|
"""
|
||||||
|
Review and approve recommendations.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
files: List of CSV files to review (auto-detects if not provided)
|
||||||
|
"""
|
||||||
|
logger.info("✅ Reviewing and approving recommendations...")
|
||||||
|
|
||||||
|
self.approval_system = UserApprovalSystem()
|
||||||
|
|
||||||
|
if not files:
|
||||||
|
# Auto-detect recommendation files
|
||||||
|
files = self._find_recommendation_files()
|
||||||
|
|
||||||
|
if not files:
|
||||||
|
raise FileNotFoundError("No recommendation files found. Run analyze() or categories() first.")
|
||||||
|
|
||||||
|
logger.info(f"Found {len(files)} recommendation files to review")
|
||||||
|
self.approval_system.run_interactive_approval(files)
|
||||||
|
|
||||||
|
logger.info("✅ Approval process completed!")
|
||||||
|
|
||||||
|
def full_pipeline(self) -> None:
|
||||||
|
"""
|
||||||
|
Run complete workflow: export → analyze → seo_check
|
||||||
|
"""
|
||||||
|
logger.info("🚀 Running full SEO automation pipeline...")
|
||||||
|
|
||||||
|
# Step 1: Export
|
||||||
|
logger.info("\n📦 Step 1/3: Exporting posts...")
|
||||||
|
self.export()
|
||||||
|
|
||||||
|
# Step 2: Analyze
|
||||||
|
logger.info("\n🤖 Step 2/3: Analyzing with AI...")
|
||||||
|
self.analyze()
|
||||||
|
|
||||||
|
# Step 3: SEO Check
|
||||||
|
logger.info("\n🔍 Step 3/3: Checking SEO quality...")
|
||||||
|
self.seo_check()
|
||||||
|
|
||||||
|
logger.info("\n✅ Full pipeline completed!")
|
||||||
|
|
||||||
|
def _find_latest_export(self) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Find the latest exported CSV file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Path to latest CSV file or None if not found
|
||||||
|
"""
|
||||||
|
csv_files = list(self.output_dir.glob('all_posts_*.csv'))
|
||||||
|
|
||||||
|
if not csv_files:
|
||||||
|
return None
|
||||||
|
|
||||||
|
latest = max(csv_files, key=lambda f: f.stat().st_ctime)
|
||||||
|
return str(latest)
|
||||||
|
|
||||||
|
def _find_recommendation_files(self) -> List[str]:
|
||||||
|
"""
|
||||||
|
Find recommendation files in output directory.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of paths to recommendation files
|
||||||
|
"""
|
||||||
|
patterns = [
|
||||||
|
'category_assignments_*.csv',
|
||||||
|
'posts_with_ai_recommendations_*.csv',
|
||||||
|
'posts_to_move_*.csv',
|
||||||
|
'posts_to_consolidate_*.csv',
|
||||||
|
'posts_to_delete_*.csv'
|
||||||
|
]
|
||||||
|
|
||||||
|
files = []
|
||||||
|
for pattern in patterns:
|
||||||
|
files.extend(self.output_dir.glob(pattern))
|
||||||
|
|
||||||
|
return [str(f) for f in files]
|
||||||
|
|
||||||
|
def status(self) -> dict:
|
||||||
|
"""
|
||||||
|
Get status of output files.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with file information
|
||||||
|
"""
|
||||||
|
files = list(self.output_dir.glob('*.csv'))
|
||||||
|
|
||||||
|
status_info = {
|
||||||
|
'total_files': len(files),
|
||||||
|
'files': []
|
||||||
|
}
|
||||||
|
|
||||||
|
for file in sorted(files, key=lambda f: f.stat().st_ctime, reverse=True)[:10]:
|
||||||
|
status_info['files'].append({
|
||||||
|
'name': file.name,
|
||||||
|
'size_kb': file.stat().st_size / 1024,
|
||||||
|
'modified': datetime.fromtimestamp(file.stat().st_mtime).strftime('%Y-%m-%d %H:%M')
|
||||||
|
})
|
||||||
|
|
||||||
|
return status_info
|
||||||
14
src/seo/approval.py
Normal file
14
src/seo/approval.py
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
"""
|
||||||
|
Approval System Module - User approval for recommendations
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Import from scripts directory
|
||||||
|
scripts_dir = Path(__file__).parent.parent.parent / 'scripts'
|
||||||
|
sys.path.insert(0, str(scripts_dir))
|
||||||
|
|
||||||
|
from user_approval import UserApprovalSystem
|
||||||
|
|
||||||
|
__all__ = ['UserApprovalSystem']
|
||||||
14
src/seo/categories.py
Normal file
14
src/seo/categories.py
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
"""
|
||||||
|
Category Manager Module - Category management across sites
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Import from scripts directory
|
||||||
|
scripts_dir = Path(__file__).parent.parent.parent / 'scripts'
|
||||||
|
sys.path.insert(0, str(scripts_dir))
|
||||||
|
|
||||||
|
from category_manager import CategoryManager
|
||||||
|
|
||||||
|
__all__ = ['CategoryManager']
|
||||||
223
src/seo/cli.py
Normal file
223
src/seo/cli.py
Normal file
@@ -0,0 +1,223 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
SEO Automation CLI - Main entry point
|
||||||
|
Unified command-line interface for SEO automation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Add src to path
|
||||||
|
src_dir = Path(__file__).parent / 'src'
|
||||||
|
sys.path.insert(0, str(src_dir))
|
||||||
|
|
||||||
|
from seo.app import SEOApp
|
||||||
|
from seo.config import Config
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main CLI entry point."""
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
prog='seo',
|
||||||
|
description='SEO Automation CLI - Manage WordPress SEO with AI',
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
epilog="""
|
||||||
|
Examples:
|
||||||
|
seo export Export all posts from WordPress sites
|
||||||
|
seo analyze Analyze posts with AI for recommendations
|
||||||
|
seo analyze posts.csv Analyze specific CSV file
|
||||||
|
seo recategorize Recategorize posts with AI
|
||||||
|
seo seo_check Check SEO quality of titles/descriptions
|
||||||
|
seo categories Manage categories across sites
|
||||||
|
seo approve Review and approve recommendations
|
||||||
|
seo full_pipeline Run complete workflow: export → analyze → seo_check
|
||||||
|
seo status Show output files status
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument('command', nargs='?', help='Command to run')
|
||||||
|
parser.add_argument('args', nargs='*', help='Arguments for the command')
|
||||||
|
parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
|
||||||
|
parser.add_argument('--dry-run', action='store_true', help='Show what would be done')
|
||||||
|
parser.add_argument('--top-n', type=int, default=10, help='Number of top posts for AI analysis')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if not args.command:
|
||||||
|
parser.print_help()
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Validate configuration
|
||||||
|
try:
|
||||||
|
Config.validate()
|
||||||
|
except ValueError as e:
|
||||||
|
print(f"❌ Configuration error: {e}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Create application instance
|
||||||
|
app = SEOApp(verbose=args.verbose)
|
||||||
|
|
||||||
|
# Route to appropriate command
|
||||||
|
commands = {
|
||||||
|
'export': cmd_export,
|
||||||
|
'analyze': cmd_analyze,
|
||||||
|
'recategorize': cmd_recategorize,
|
||||||
|
'seo_check': cmd_seo_check,
|
||||||
|
'categories': cmd_categories,
|
||||||
|
'approve': cmd_approve,
|
||||||
|
'full_pipeline': cmd_full_pipeline,
|
||||||
|
'status': cmd_status,
|
||||||
|
'help': cmd_help,
|
||||||
|
}
|
||||||
|
|
||||||
|
if args.command not in commands:
|
||||||
|
print(f"❌ Unknown command: {args.command}")
|
||||||
|
print("\nAvailable commands:")
|
||||||
|
for cmd in sorted(commands.keys()):
|
||||||
|
print(f" {cmd}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
return commands[args.command](app, args)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\n⚠️ Operation cancelled by user")
|
||||||
|
return 1
|
||||||
|
except FileNotFoundError as e:
|
||||||
|
print(f"❌ File not found: {e}")
|
||||||
|
return 1
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error: {e}")
|
||||||
|
if args.verbose:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_export(app, args):
|
||||||
|
"""Export all posts."""
|
||||||
|
if args.dry_run:
|
||||||
|
print("Would export all posts from WordPress sites")
|
||||||
|
return 0
|
||||||
|
app.export()
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_analyze(app, args):
|
||||||
|
"""Analyze posts with AI."""
|
||||||
|
if args.dry_run:
|
||||||
|
print("Would analyze posts with AI for recommendations")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
csv_file = args.args[0] if args.args else None
|
||||||
|
app.analyze(csv_file)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_recategorize(app, args):
|
||||||
|
"""Recategorize posts with AI."""
|
||||||
|
if args.dry_run:
|
||||||
|
print("Would recategorize posts with AI suggestions")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
csv_file = args.args[0] if args.args else None
|
||||||
|
app.recategorize(csv_file)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_seo_check(app, args):
|
||||||
|
"""Check SEO quality."""
|
||||||
|
if args.dry_run:
|
||||||
|
print("Would check SEO quality of titles/descriptions")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
app.seo_check(top_n=args.top_n)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_categories(app, args):
|
||||||
|
"""Manage categories."""
|
||||||
|
if args.dry_run:
|
||||||
|
print("Would manage categories across all sites")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
app.categories()
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_approve(app, args):
|
||||||
|
"""Approve recommendations."""
|
||||||
|
if args.dry_run:
|
||||||
|
print("Would review and approve recommendations")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
files = args.args if args.args else None
|
||||||
|
app.approve(files)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_full_pipeline(app, args):
|
||||||
|
"""Run full pipeline."""
|
||||||
|
if args.dry_run:
|
||||||
|
print("Would run full pipeline: export → analyze → seo_check")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
app.full_pipeline()
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_status(app, args):
|
||||||
|
"""Show status."""
|
||||||
|
if args.dry_run:
|
||||||
|
print("Would show output files status")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
status = app.status()
|
||||||
|
|
||||||
|
print("📊 Output files status:")
|
||||||
|
if status['total_files'] > 0:
|
||||||
|
print(f"\nFound {status['total_files']} CSV files in output/:")
|
||||||
|
for file in status['files']:
|
||||||
|
print(f" {file['name']} ({file['size_kb']:.1f}KB, {file['modified']})")
|
||||||
|
else:
|
||||||
|
print(" No CSV files found in output/")
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_help(app, args):
|
||||||
|
"""Show help."""
|
||||||
|
print("""
|
||||||
|
SEO Automation CLI - Available Commands
|
||||||
|
|
||||||
|
Basic Commands:
|
||||||
|
export Export all posts from WordPress sites
|
||||||
|
analyze [csv_file] Analyze posts with AI (optional CSV input)
|
||||||
|
recategorize [csv_file] Recategorize posts with AI (optional CSV input)
|
||||||
|
seo_check Check SEO quality of titles/descriptions
|
||||||
|
categories Manage categories across all sites
|
||||||
|
approve [files...] Review and approve recommendations
|
||||||
|
full_pipeline Run complete workflow: export → analyze → seo_check
|
||||||
|
|
||||||
|
Utility:
|
||||||
|
status Show output files status
|
||||||
|
help Show this help message
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--verbose, -v Enable verbose logging
|
||||||
|
--dry-run Show what would be done without doing it
|
||||||
|
--top-n N Number of top posts for AI analysis (default: 10)
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
seo export
|
||||||
|
seo analyze
|
||||||
|
seo analyze output/all_posts_2026-02-16.csv
|
||||||
|
seo approve output/category_assignments_*.csv
|
||||||
|
seo full_pipeline
|
||||||
|
seo status
|
||||||
|
""")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
||||||
70
src/seo/config.py
Normal file
70
src/seo/config.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
"""
|
||||||
|
Configuration module for SEO application.
|
||||||
|
Loads configuration from environment variables and YAML.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import yaml
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
"""Configuration class for SEO automation."""
|
||||||
|
|
||||||
|
CONFIG_FILE = Path(__file__).parent.parent / 'config.yaml'
|
||||||
|
|
||||||
|
if CONFIG_FILE.exists():
|
||||||
|
with open(CONFIG_FILE, 'r', encoding='utf-8') as f:
|
||||||
|
YAML_CONFIG = yaml.safe_load(f)
|
||||||
|
else:
|
||||||
|
YAML_CONFIG = {}
|
||||||
|
|
||||||
|
# WordPress Settings
|
||||||
|
WORDPRESS_URL = os.getenv('WORDPRESS_URL', YAML_CONFIG.get('primary_site', {}).get('url', '')).rstrip('/')
|
||||||
|
WORDPRESS_USERNAME = os.getenv('WORDPRESS_USERNAME', YAML_CONFIG.get('primary_site', {}).get('username', ''))
|
||||||
|
WORDPRESS_APP_PASSWORD = os.getenv('WORDPRESS_APP_PASSWORD', YAML_CONFIG.get('primary_site', {}).get('password', ''))
|
||||||
|
|
||||||
|
# Multi-site Configuration
|
||||||
|
WORDPRESS_SITES = {
|
||||||
|
'mistergeek.net': {
|
||||||
|
'url': os.getenv('WORDPRESS_MISTERGEEK_URL', YAML_CONFIG.get('wordpress_sites', {}).get('mistergeek.net', {}).get('url', 'https://www.mistergeek.net')),
|
||||||
|
'username': os.getenv('WORDPRESS_MISTERGEEK_USERNAME', os.getenv('WORDPRESS_USERNAME', '')),
|
||||||
|
'password': os.getenv('WORDPRESS_MISTERGEEK_PASSWORD', os.getenv('WORDPRESS_APP_PASSWORD', '')),
|
||||||
|
},
|
||||||
|
'webscroll.fr': {
|
||||||
|
'url': os.getenv('WORDPRESS_WEBSCROLL_URL', YAML_CONFIG.get('wordpress_sites', {}).get('webscroll.fr', {}).get('url', 'https://www.webscroll.fr')),
|
||||||
|
'username': os.getenv('WORDPRESS_WEBSCROLL_USERNAME', os.getenv('WORDPRESS_USERNAME', '')),
|
||||||
|
'password': os.getenv('WORDPRESS_WEBSCROLL_PASSWORD', os.getenv('WORDPRESS_APP_PASSWORD', '')),
|
||||||
|
},
|
||||||
|
'hellogeek.net': {
|
||||||
|
'url': os.getenv('WORDPRESS_HELLOGEEK_URL', YAML_CONFIG.get('wordpress_sites', {}).get('hellogeek.net', {}).get('url', 'https://www.hellogeek.net')),
|
||||||
|
'username': os.getenv('WORDPRESS_HELLOGEEK_USERNAME', os.getenv('WORDPRESS_USERNAME', '')),
|
||||||
|
'password': os.getenv('WORDPRESS_HELLOGEEK_PASSWORD', os.getenv('WORDPRESS_APP_PASSWORD', '')),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# OpenRouter API Settings
|
||||||
|
OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY', YAML_CONFIG.get('ai_model', {}).get('api_key', ''))
|
||||||
|
AI_MODEL = os.getenv('AI_MODEL', YAML_CONFIG.get('ai_model', {}).get('name', 'anthropic/claude-3.5-sonnet'))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def validate(cls):
|
||||||
|
"""Validate configuration."""
|
||||||
|
errors = []
|
||||||
|
|
||||||
|
if not cls.WORDPRESS_URL:
|
||||||
|
errors.append("WORDPRESS_URL is required")
|
||||||
|
if not cls.WORDPRESS_USERNAME:
|
||||||
|
errors.append("WORDPRESS_USERNAME is required")
|
||||||
|
if not cls.WORDPRESS_APP_PASSWORD:
|
||||||
|
errors.append("WORDPRESS_APP_PASSWORD is required")
|
||||||
|
if not cls.OPENROUTER_API_KEY:
|
||||||
|
errors.append("OPENROUTER_API_KEY is required")
|
||||||
|
|
||||||
|
if errors:
|
||||||
|
raise ValueError("Configuration errors:\n" + "\n".join(f" - {e}" for e in errors))
|
||||||
|
|
||||||
|
return True
|
||||||
226
src/seo/exporter.py
Normal file
226
src/seo/exporter.py
Normal file
@@ -0,0 +1,226 @@
|
|||||||
|
"""
|
||||||
|
Post Exporter Module - Export posts from WordPress sites
|
||||||
|
"""
|
||||||
|
|
||||||
|
import csv
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
import requests
|
||||||
|
from requests.auth import HTTPBasicAuth
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .config import Config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class PostExporter:
|
||||||
|
"""Export posts from WordPress sites to CSV."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
"""Initialize the exporter."""
|
||||||
|
self.sites = Config.WORDPRESS_SITES
|
||||||
|
self.all_posts = []
|
||||||
|
self.category_cache = {}
|
||||||
|
|
||||||
|
def fetch_category_names(self, site_name: str, site_config: Dict) -> Dict[int, str]:
|
||||||
|
"""Fetch category names from a WordPress site."""
|
||||||
|
if site_name in self.category_cache:
|
||||||
|
return self.category_cache[site_name]
|
||||||
|
|
||||||
|
logger.info(f" Fetching categories from {site_name}...")
|
||||||
|
categories = {}
|
||||||
|
base_url = site_config['url'].rstrip('/')
|
||||||
|
api_url = f"{base_url}/wp-json/wp/v2/categories"
|
||||||
|
auth = HTTPBasicAuth(site_config['username'], site_config['password'])
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.get(api_url, params={'per_page': 100}, auth=auth, timeout=10)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
for cat in response.json():
|
||||||
|
categories[cat['id']] = {'name': cat.get('name', ''), 'slug': cat.get('slug', '')}
|
||||||
|
logger.info(f" ✓ Fetched {len(categories)} categories")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f" Could not fetch categories from {site_name}: {e}")
|
||||||
|
|
||||||
|
self.category_cache[site_name] = categories
|
||||||
|
return categories
|
||||||
|
|
||||||
|
def fetch_posts_from_site(self, site_name: str, site_config: Dict) -> List[Dict]:
|
||||||
|
"""Fetch all posts from a WordPress site."""
|
||||||
|
logger.info(f"\nFetching posts from {site_name}...")
|
||||||
|
|
||||||
|
posts = []
|
||||||
|
base_url = site_config['url'].rstrip('/')
|
||||||
|
api_url = f"{base_url}/wp-json/wp/v2/posts"
|
||||||
|
auth = HTTPBasicAuth(site_config['username'], site_config['password'])
|
||||||
|
|
||||||
|
for status in ['publish', 'draft']:
|
||||||
|
page = 1
|
||||||
|
status_count = 0
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
logger.info(f" Fetching page {page} ({status} posts)...")
|
||||||
|
response = requests.get(
|
||||||
|
api_url,
|
||||||
|
params={'page': page, 'per_page': 100, 'status': status},
|
||||||
|
auth=auth,
|
||||||
|
timeout=10
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
page_posts = response.json()
|
||||||
|
if not page_posts:
|
||||||
|
break
|
||||||
|
|
||||||
|
posts.extend(page_posts)
|
||||||
|
status_count += len(page_posts)
|
||||||
|
logger.info(f" ✓ Got {len(page_posts)} posts (total: {len(posts)})")
|
||||||
|
|
||||||
|
page += 1
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
except requests.exceptions.HTTPError as e:
|
||||||
|
if response.status_code == 400:
|
||||||
|
logger.info(f" ℹ API limit reached (got {status_count} {status} posts)")
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
logger.error(f"Error on page {page}: {e}")
|
||||||
|
break
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
logger.error(f"Error fetching from {site_name}: {e}")
|
||||||
|
break
|
||||||
|
|
||||||
|
logger.info(f"✓ Total posts from {site_name}: {len(posts)}\n")
|
||||||
|
return posts
|
||||||
|
|
||||||
|
def extract_post_details(self, post: Dict, site_name: str, category_map: Dict) -> Dict:
|
||||||
|
"""Extract post details for CSV export."""
|
||||||
|
title = post.get('title', {})
|
||||||
|
if isinstance(title, dict):
|
||||||
|
title = title.get('rendered', '')
|
||||||
|
|
||||||
|
content = post.get('content', {})
|
||||||
|
if isinstance(content, dict):
|
||||||
|
content = content.get('rendered', '')
|
||||||
|
content_text = re.sub('<[^<]+?>', '', content)[:500]
|
||||||
|
|
||||||
|
excerpt = post.get('excerpt', {})
|
||||||
|
if isinstance(excerpt, dict):
|
||||||
|
excerpt = excerpt.get('rendered', '')
|
||||||
|
excerpt_text = re.sub('<[^<]+?>', '', excerpt)
|
||||||
|
|
||||||
|
meta_dict = post.get('meta', {}) if isinstance(post.get('meta'), dict) else {}
|
||||||
|
meta_description = (
|
||||||
|
meta_dict.get('rank_math_description', '') or
|
||||||
|
meta_dict.get('_yoast_wpseo_metadesc', '') or ''
|
||||||
|
)
|
||||||
|
|
||||||
|
category_ids = post.get('categories', [])
|
||||||
|
category_names = ', '.join([
|
||||||
|
category_map.get(cat_id, {}).get('name', str(cat_id))
|
||||||
|
for cat_id in category_ids
|
||||||
|
]) if category_ids else ''
|
||||||
|
|
||||||
|
return {
|
||||||
|
'site': site_name,
|
||||||
|
'post_id': post['id'],
|
||||||
|
'status': post.get('status', 'publish'),
|
||||||
|
'title': title.strip(),
|
||||||
|
'slug': post.get('slug', ''),
|
||||||
|
'url': post.get('link', ''),
|
||||||
|
'author_id': post.get('author', ''),
|
||||||
|
'date_published': post.get('date', ''),
|
||||||
|
'date_modified': post.get('modified', ''),
|
||||||
|
'categories': category_names,
|
||||||
|
'tags': ', '.join([str(t) for t in post.get('tags', [])]),
|
||||||
|
'excerpt': excerpt_text.strip(),
|
||||||
|
'content_preview': content_text.strip(),
|
||||||
|
'seo_title': meta_dict.get('rank_math_title', ''),
|
||||||
|
'meta_description': meta_description,
|
||||||
|
'focus_keyword': meta_dict.get('rank_math_focus_keyword', ''),
|
||||||
|
'word_count': len(content_text.split()),
|
||||||
|
}
|
||||||
|
|
||||||
|
def export_to_csv(self, output_file: Optional[str] = None) -> str:
|
||||||
|
"""Export all posts to CSV."""
|
||||||
|
if not output_file:
|
||||||
|
output_dir = Path(__file__).parent.parent.parent / 'output'
|
||||||
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
date_str = datetime.now().strftime('%Y-%m-%d')
|
||||||
|
output_file = output_dir / f'all_posts_{date_str}.csv'
|
||||||
|
|
||||||
|
output_file = Path(output_file)
|
||||||
|
output_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
if not self.all_posts:
|
||||||
|
logger.error("No posts to export")
|
||||||
|
return None
|
||||||
|
|
||||||
|
fieldnames = [
|
||||||
|
'site', 'post_id', 'status', 'title', 'slug', 'url', 'author_id',
|
||||||
|
'date_published', 'date_modified', 'categories', 'tags', 'excerpt',
|
||||||
|
'content_preview', 'seo_title', 'meta_description', 'focus_keyword', 'word_count',
|
||||||
|
]
|
||||||
|
|
||||||
|
logger.info(f"Exporting {len(self.all_posts)} posts to CSV...")
|
||||||
|
|
||||||
|
with open(output_file, 'w', newline='', encoding='utf-8') as f:
|
||||||
|
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
writer.writerows(self.all_posts)
|
||||||
|
|
||||||
|
logger.info(f"✓ CSV exported to: {output_file}")
|
||||||
|
return str(output_file)
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
"""Run the complete export process."""
|
||||||
|
logger.info("="*70)
|
||||||
|
logger.info("EXPORTING ALL POSTS FOR AI DECISION MAKING")
|
||||||
|
logger.info("="*70)
|
||||||
|
logger.info("Sites configured: " + ", ".join(self.sites.keys()))
|
||||||
|
|
||||||
|
for site_name, config in self.sites.items():
|
||||||
|
categories = self.fetch_category_names(site_name, config)
|
||||||
|
posts = self.fetch_posts_from_site(site_name, config)
|
||||||
|
|
||||||
|
if posts:
|
||||||
|
for post in posts:
|
||||||
|
post_details = self.extract_post_details(post, site_name, categories)
|
||||||
|
self.all_posts.append(post_details)
|
||||||
|
|
||||||
|
if not self.all_posts:
|
||||||
|
logger.error("No posts found on any site")
|
||||||
|
return
|
||||||
|
|
||||||
|
self.all_posts.sort(key=lambda x: (x['site'], x['post_id']))
|
||||||
|
self.export_to_csv()
|
||||||
|
|
||||||
|
# Print summary
|
||||||
|
logger.info("\n" + "="*70)
|
||||||
|
logger.info("EXPORT SUMMARY")
|
||||||
|
logger.info("="*70)
|
||||||
|
|
||||||
|
by_site = {}
|
||||||
|
for post in self.all_posts:
|
||||||
|
site = post['site']
|
||||||
|
if site not in by_site:
|
||||||
|
by_site[site] = {'total': 0, 'published': 0, 'draft': 0}
|
||||||
|
by_site[site]['total'] += 1
|
||||||
|
if post['status'] == 'publish':
|
||||||
|
by_site[site]['published'] += 1
|
||||||
|
else:
|
||||||
|
by_site[site]['draft'] += 1
|
||||||
|
|
||||||
|
for site, stats in sorted(by_site.items()):
|
||||||
|
logger.info(f"\n{site}:")
|
||||||
|
logger.info(f" Total: {stats['total']}")
|
||||||
|
logger.info(f" Published: {stats['published']}")
|
||||||
|
logger.info(f" Drafts: {stats['draft']}")
|
||||||
|
|
||||||
|
logger.info(f"\n✓ Export complete!")
|
||||||
14
src/seo/recategorizer.py
Normal file
14
src/seo/recategorizer.py
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
"""
|
||||||
|
Recategorizer Module - AI-powered post recategorization
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Import from scripts directory
|
||||||
|
scripts_dir = Path(__file__).parent.parent.parent / 'scripts'
|
||||||
|
sys.path.insert(0, str(scripts_dir))
|
||||||
|
|
||||||
|
from ai_recategorize_posts import PostRecategorizer
|
||||||
|
|
||||||
|
__all__ = ['PostRecategorizer']
|
||||||
14
src/seo/seo_checker.py
Normal file
14
src/seo/seo_checker.py
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
"""
|
||||||
|
SEO Checker Module - SEO quality analysis
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Import from scripts directory
|
||||||
|
scripts_dir = Path(__file__).parent.parent.parent / 'scripts'
|
||||||
|
sys.path.insert(0, str(scripts_dir))
|
||||||
|
|
||||||
|
from multi_site_seo_analyzer import MultiSiteSEOAnalyzer
|
||||||
|
|
||||||
|
__all__ = ['MultiSiteSEOAnalyzer']
|
||||||
Reference in New Issue
Block a user