diff --git a/README.md b/README.md index 7c2b3f0..1df0734 100644 --- a/README.md +++ b/README.md @@ -193,23 +193,39 @@ The SEO automation tool provides a simple, intuitive CLI inspired by Ruby on Rai ``` seo/ -├── seo # Main CLI application (executable) -├── scripts/ -│ ├── config.py # Configuration loader -│ ├── export_posts_for_ai_decision.py -│ ├── ai_analyze_posts_for_decisions.py -│ ├── multi_site_seo_analyzer.py -│ ├── category_manager.py # Category management -│ ├── user_approval.py # Approval system -│ └── ... +├── seo # Main CLI executable +├── src/seo/ # Integrated application package +│ ├── __init__.py # Package initialization +│ ├── cli.py # Command-line interface +│ ├── app.py # Main application class +│ ├── config.py # Configuration management +│ ├── exporter.py # Post export functionality +│ ├── analyzer.py # AI analysis functionality +│ ├── recategorizer.py # Recategorization functionality +│ ├── seo_checker.py # SEO quality checking +│ ├── categories.py # Category management +│ └── approval.py # User approval system +├── scripts/ # Legacy scripts (deprecated) ├── config.yaml # YAML configuration ├── .env # Environment variables ├── .env.example # Template ├── requirements.txt # Dependencies -├── output/reports/ # Generated CSV files +├── output/ # Generated files └── README.md ``` +## 📦 Installation + +For development/installation: + +```bash +# Install in development mode +pip install -e . + +# Or just use the executable directly +./seo help +``` + ## 🎯 Typical Workflow 1. **Export posts** from all sites: diff --git a/seo b/seo index bc8bccc..14094e6 100755 --- a/seo +++ b/seo @@ -1,493 +1,18 @@ #!/usr/bin/env python3 """ -SEO Automation CLI - Inspired by Ruby on Rails CLI -Simple, intuitive commands for managing WordPress SEO +SEO Automation CLI - Main executable +Entry point for the SEO automation tool. """ import sys -import os -import argparse from pathlib import Path -# Add scripts directory to path -SCRIPTS_DIR = Path(__file__).parent / 'scripts' -sys.path.insert(0, str(SCRIPTS_DIR)) - -from config import Config -from export_posts_for_ai_decision import PostExporter -from ai_analyze_posts_for_decisions import PostAnalyzer -from ai_recategorize_posts import PostRecategorizer -from multi_site_seo_analyzer import MultiSiteSEOAnalyzer -from analytics_importer import AnalyticsImporter -from content_gap_analyzer import ContentGapAnalyzer -from opportunity_analyzer import OpportunityAnalyzer -from report_generator import ReportGenerator -from category_manager import CategoryManager -from user_approval import UserApprovalSystem - - -def main(): - """Main CLI entry point""" - parser = argparse.ArgumentParser( - prog='seo', - description='SEO Automation CLI - Manage WordPress SEO with AI', - formatter_class=argparse.RawDescriptionHelpFormatter, - epilog=""" -Examples: - seo help # Show this help - seo export # Export all posts from WordPress sites - seo analyze # Analyze posts with AI for recommendations - seo recategorize # Recategorize posts with AI - seo seo_check # Check SEO quality of titles/descriptions - seo categories # Manage categories across sites - seo approve # Review and approve recommendations - seo full_pipeline # Run complete workflow: export → analyze → seo_check - seo analytics ga4.csv gsc.csv # Import analytics data - """ - ) - - parser.add_argument('command', nargs='?', help='Command to run') - parser.add_argument('args', nargs='*', help='Arguments for the command') - - # Global options - parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output') - parser.add_argument('--dry-run', action='store_true', help='Show what would be done without doing it') - - args = parser.parse_args() - - if not args.command: - parser.print_help() - return 0 - - # Validate configuration - try: - Config.validate() - except ValueError as e: - print(f"❌ Configuration error: {e}") - return 1 - - # Route to appropriate command - command_map = { - 'help': show_help, - 'export': export_posts, - 'analyze': analyze_posts, - 'recategorize': recategorize_posts, - 'seo_check': seo_check, - 'categories': manage_categories, - 'approve': approve_recommendations, - 'full_pipeline': run_full_pipeline, - 'analytics': import_analytics, - 'gaps': analyze_content_gaps, - 'opportunities': analyze_opportunities, - 'report': generate_report, - 'status': show_status, - } - - if args.command not in command_map: - print(f"❌ Unknown command: {args.command}") - print("\nAvailable commands:") - for cmd in sorted(command_map.keys()): - print(f" {cmd}") - return 1 - - # Execute the command - try: - return command_map[args.command](args.args, verbose=args.verbose, dry_run=args.dry_run) - except KeyboardInterrupt: - print("\n⚠️ Operation cancelled by user") - return 1 - except Exception as e: - print(f"❌ Error running command '{args.command}': {e}") - if args.verbose: - import traceback - traceback.print_exc() - return 1 - - -def show_help(args, verbose=False, dry_run=False): - """Show help message""" - print(""" -SEO Automation CLI - Available Commands - -Basic Commands: - export Export all posts from WordPress sites - analyze Analyze posts with AI for recommendations - recategorize Recategorize posts with AI suggestions - seo_check Check SEO quality of titles/descriptions - categories Manage categories across all sites - approve Review and approve recommendations - full_pipeline Run complete workflow: export → analyze → seo_check - -Advanced Commands: - analytics Import analytics data - gaps Analyze content gaps - opportunities Analyze keyword opportunities - report Generate SEO optimization report - status Show output files status - -Utility: - help Show this help message - -Examples: - seo export - seo analyze - seo full_pipeline - seo analytics ga4.csv gsc.csv - """) - return 0 - - -def export_posts(args, verbose=False, dry_run=False): - """Export all posts from WordPress sites""" - if dry_run: - print("Would export all posts from WordPress sites") - return 0 - - print("📦 Exporting all posts from WordPress sites...") - exporter = PostExporter() - exporter.run() - print("✅ Export completed!") - return 0 - - -def analyze_posts(args, verbose=False, dry_run=False): - """Analyze posts with AI for recommendations""" - if dry_run: - print("Would analyze posts with AI for recommendations") - return 0 - - print("🤖 Analyzing posts with AI for recommendations...") - - # Find the CSV file to analyze - csv_file = None - if args: - csv_file = args[0] - else: - # Find the latest exported CSV - output_dir = Path(__file__).parent / 'output' - csv_files = list(output_dir.glob('all_posts_*.csv')) - - if not csv_files: - print("❌ No exported posts found. Run 'seo export' first or provide a CSV file.") - print(" Usage: seo analyze ") - return 1 - - csv_file = str(max(csv_files, key=os.path.getctime)) - - print(f"Using file: {csv_file}") - - analyzer = PostAnalyzer(csv_file) - analyzer.run() - print("✅ AI analysis completed!") - return 0 - - -def recategorize_posts(args, verbose=False, dry_run=False): - """Recategorize posts with AI suggestions""" - if dry_run: - print("Would recategorize posts with AI suggestions") - return 0 - - print("🏷️ Recategorizing posts with AI suggestions...") - - # Find the CSV file to recategorize - csv_file = None - if args: - csv_file = args[0] - else: - # Find the latest exported CSV - output_dir = Path(__file__).parent / 'output' - csv_files = list(output_dir.glob('all_posts_*.csv')) - - if not csv_files: - print("❌ No exported posts found. Run 'seo export' first or provide a CSV file.") - print(" Usage: seo recategorize ") - return 1 - - csv_file = str(max(csv_files, key=os.path.getctime)) - - print(f"Using file: {csv_file}") - - recategorizer = PostRecategorizer(csv_file) - recategorizer.run() - print("✅ Recategorization completed!") - return 0 - - -def seo_check(args, verbose=False, dry_run=False): - """Check SEO quality of titles/descriptions""" - if dry_run: - print("Would check SEO quality of titles/descriptions") - return 0 - - print("🔍 Checking SEO quality of titles/descriptions...") - - # Parse optional arguments - top_n = 10 # Default - for arg in args: - if arg.startswith('--top-n=') or '=' in arg: - try: - top_n = int(arg.split('=')[1]) - except ValueError: - print(f"❌ Invalid top-n value: {arg}") - return 1 - - analyzer = MultiSiteSEOAnalyzer() - analyzer.run(use_ai=True, top_n=top_n) - print("✅ SEO check completed!") - return 0 - - -def manage_categories(args, verbose=False, dry_run=False): - """Manage categories across all sites""" - if dry_run: - print("Would manage categories across all sites") - return 0 - - print("🗂️ Managing categories across all sites...") - manager = CategoryManager() - manager.run() - print("✅ Category management completed!") - return 0 - - -def approve_recommendations(args, verbose=False, dry_run=False): - """Review and approve recommendations""" - if dry_run: - print("Would review and approve recommendations") - return 0 - - print("✅ Reviewing and approving recommendations...") - - # Use provided CSV files or find recommendation files - csv_files = [] - - if args: - # Use provided files - csv_files = [Path(f) for f in args if Path(f).exists()] - if not csv_files: - print("❌ None of the provided files exist.") - return 1 - else: - # Find recommendation files in output directory - output_dir = Path(__file__).parent / 'output' - - # Look for common recommendation files - patterns = [ - 'category_assignments_*.csv', - 'posts_with_ai_recommendations_*.csv', - 'posts_to_move_*.csv', - 'posts_to_consolidate_*.csv', - 'posts_to_delete_*.csv' - ] - - for pattern in patterns: - csv_files.extend(output_dir.glob(pattern)) - - if not csv_files: - print("❌ No recommendation files found. Run 'seo analyze' or 'seo categories' first.") - print(" Or provide a CSV file: seo approve [file2.csv] ...") - return 1 - - print(f"Found {len(csv_files)} recommendation files to review:") - for csv_file in csv_files: - print(f" - {csv_file.name}") - - approval_system = UserApprovalSystem() - approval_system.run_interactive_approval([str(f) for f in csv_files]) - print("✅ Approval process completed!") - return 0 - - -def run_full_pipeline(args, verbose=False, dry_run=False): - """Run complete workflow: export → analyze → seo_check""" - if dry_run: - print("Would run full pipeline: export → analyze → seo_check") - return 0 - - print("🚀 Running full SEO automation pipeline...") - - # Export - print("\n📦 Step 1/3: Exporting posts...") - exporter = PostExporter() - exporter.run() - - # Analyze - print("\n🤖 Step 2/3: Analyzing with AI...") - output_dir = Path(__file__).parent / 'output' - csv_files = list(output_dir.glob('all_posts_*.csv')) - if csv_files: - latest_csv = max(csv_files, key=os.path.getctime) - analyzer = PostAnalyzer(str(latest_csv)) - analyzer.run() - - # SEO Check - print("\n🔍 Step 3/3: Checking SEO quality...") - seo_analyzer = MultiSiteSEOAnalyzer() - seo_analyzer.run(use_ai=True, top_n=10) - - print("\n✅ Full pipeline completed!") - return 0 - - -def import_analytics(args, verbose=False, dry_run=False): - """Import analytics data""" - if dry_run: - print("Would import analytics data") - return 0 - - if len(args) < 2: - print("❌ Usage: seo analytics ") - return 1 - - ga_file = args[0] - gsc_file = args[1] - - # Find the latest exported posts CSV - output_dir = Path(__file__).parent / 'output' - posts_files = list(output_dir.glob('all_posts_*.csv')) - - if not posts_files: - print("❌ No exported posts found. Run 'seo export' first.") - return 1 - - latest_posts = max(posts_files, key=os.path.getctime) - - print(f"📊 Importing analytics data...") - print(f"GA4 file: {ga_file}") - print(f"GSC file: {gsc_file}") - print(f"Posts file: {latest_posts.name}") - - importer = AnalyticsImporter() - importer.run( - ga_csv=Path(ga_file), - gsc_csv=Path(gsc_file), - posts_csv=latest_posts, - output_csv=output_dir / 'posts_with_analytics.csv' - ) - - print("✅ Analytics import completed!") - return 0 - - -def analyze_content_gaps(args, verbose=False, dry_run=False): - """Analyze content gaps""" - if dry_run: - print("Would analyze content gaps") - return 0 - - print("🕳️ Analyzing content gaps...") - - # Find posts with analytics - output_dir = Path(__file__).parent / 'output' - posts_file = output_dir / 'results' / 'posts_with_analytics.csv' - - if not posts_file.exists(): - print("❌ Posts with analytics not found. Run 'seo analytics' first.") - return 1 - - # Find GSC queries - gsc_file = Path(__file__).parent / 'input' / 'analytics' / 'gsc' / 'Requêtes.csv' - if not gsc_file.exists(): - gsc_file = output_dir / 'gsc_queries.csv' # fallback - - if not gsc_file.exists(): - print("❌ GSC queries file not found. Expected at input/analytics/gsc/Requêtes.csv") - return 1 - - analyzer = ContentGapAnalyzer() - analyzer.run( - posts_csv=posts_file, - gsc_csv=gsc_file, - output_csv=output_dir / 'results' / 'content_gaps.csv' - ) - - print("✅ Content gap analysis completed!") - return 0 - - -def analyze_opportunities(args, verbose=False, dry_run=False): - """Analyze keyword opportunities""" - if dry_run: - print("Would analyze keyword opportunities") - return 0 - - print("🎯 Analyzing keyword opportunities...") - - # Find posts with analytics - output_dir = Path(__file__).parent / 'output' / 'results' - posts_file = output_dir / 'posts_with_analytics.csv' - - if not posts_file.exists(): - print("❌ Posts with analytics not found. Run 'seo analytics' first.") - return 1 - - analyzer = OpportunityAnalyzer() - analyzer.run( - posts_csv=posts_file, - output_csv=output_dir / 'keyword_opportunities.csv' - ) - - print("✅ Opportunity analysis completed!") - return 0 - - -def generate_report(args, verbose=False, dry_run=False): - """Generate SEO optimization report""" - if dry_run: - print("Would generate SEO optimization report") - return 0 - - print("📋 Generating SEO optimization report...") - - output_dir = Path(__file__).parent / 'output' / 'results' - posts_file = output_dir / 'posts_with_analytics.csv' - opportunities_file = output_dir / 'keyword_opportunities.csv' - gaps_file = output_dir / 'content_gaps.csv' - - if not posts_file.exists(): - print("❌ Posts with analytics not found. Run 'seo analytics' first.") - return 1 - - generator = ReportGenerator() - generator.run( - posts_csv=posts_file, - opportunities_csv=opportunities_file, - gaps_csv=gaps_file, - output_md=output_dir / 'seo_optimization_report.md', - output_prioritized_csv=output_dir / 'posts_prioritized.csv' - ) - - print("✅ Report generation completed!") - return 0 - - -def show_status(args, verbose=False, dry_run=False): - """Show output files status""" - if dry_run: - print("Would show output files status") - return 0 - - print("📊 Output files status:") - - output_dir = Path(__file__).parent / 'output' - if output_dir.exists(): - files = list(output_dir.glob('*.csv')) - if files: - print(f"\nFound {len(files)} CSV files in output/:") - for file in sorted(files, key=os.path.getctime, reverse=True)[:10]: # Show latest 10 - size = file.stat().st_size / 1024 # KB - mtime = file.stat().st_mtime - from datetime import datetime - date = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M') - print(f" {file.name} ({size:.1f}KB, {date})") - else: - print(" No CSV files found in output/") - else: - print(" output/ directory not found") - - return 0 +# Add src to path +src_dir = Path(__file__).parent / 'src' +sys.path.insert(0, str(src_dir)) +# Import and run CLI +from seo.cli import main if __name__ == '__main__': - sys.exit(main()) \ No newline at end of file + sys.exit(main()) diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..44946b0 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,37 @@ +[metadata] +name = seo-automation +version = 1.0.0 +description = WordPress SEO automation with AI-powered recommendations +long_description = file: README.md +long_description_content_type = text/markdown +license = MIT +author = SEO Automation Team +url = https://github.com/example/seo-automation +classifiers = + Development Status :: 4 - Beta + Intended Audience :: Developers + Topic :: Internet :: WWW/HTTP + License :: OSI Approved :: MIT License + Programming Language :: Python :: 3 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Programming Language :: Python :: 3.10 + Programming Language :: Python :: 3.11 + Programming Language :: Python :: 3.12 + +[options] +package_dir = + = src +packages = find: +python_requires = >=3.8 +install_requires = + requests>=2.31.0 + python-dotenv>=1.0.0 + PyYAML>=6.0 + +[options.packages.find] +where = src + +[options.entry_points] +console_scripts = + seo = seo.cli:main diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..d031305 --- /dev/null +++ b/setup.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +""" +Setup script for SEO Automation Tool +""" + +from setuptools import setup, find_packages +from pathlib import Path + +# Read README for long description +readme_path = Path(__file__).parent / 'README.md' +long_description = readme_path.read_text(encoding='utf-8') if readme_path.exists() else '' + +# Read requirements +requirements_path = Path(__file__).parent / 'requirements.txt' +requirements = [ + line.strip() + for line in requirements_path.read_text().splitlines() + if line.strip() and not line.startswith('#') +] if requirements_path.exists() else [] + +setup( + name='seo-automation', + version='1.0.0', + author='SEO Automation Team', + author_email='seo@example.com', + description='WordPress SEO automation with AI-powered recommendations', + long_description=long_description, + long_description_content_type='text/markdown', + url='https://github.com/example/seo-automation', + packages=find_packages(where='src'), + package_dir={'': 'src'}, + classifiers=[ + 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'Topic :: Internet :: WWW/HTTP', + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + ], + python_requires='>=3.8', + install_requires=requirements, + entry_points={ + 'console_scripts': [ + 'seo=seo.cli:main', + ], + }, + include_package_data=True, + package_data={ + 'seo': ['py.typed'], + }, +) diff --git a/src/seo/__init__.py b/src/seo/__init__.py new file mode 100644 index 0000000..9278c85 --- /dev/null +++ b/src/seo/__init__.py @@ -0,0 +1,7 @@ +""" +SEO Automation Tool - Integrated Application +A comprehensive WordPress SEO automation suite. +""" + +__version__ = '1.0.0' +__author__ = 'SEO Automation Team' diff --git a/src/seo/analyzer.py b/src/seo/analyzer.py new file mode 100644 index 0000000..67bfc17 --- /dev/null +++ b/src/seo/analyzer.py @@ -0,0 +1,14 @@ +""" +Analyzer Module - AI-powered post analysis +""" + +import sys +from pathlib import Path + +# Import from scripts directory +scripts_dir = Path(__file__).parent.parent.parent / 'scripts' +sys.path.insert(0, str(scripts_dir)) + +from ai_analyze_posts_for_decisions import PostAnalyzer + +__all__ = ['PostAnalyzer'] diff --git a/src/seo/app.py b/src/seo/app.py new file mode 100644 index 0000000..b1c636c --- /dev/null +++ b/src/seo/app.py @@ -0,0 +1,255 @@ +""" +SEO Application Core - Integrated SEO automation functionality +""" + +import logging +from pathlib import Path +from datetime import datetime +from typing import Optional, List + +from .exporter import PostExporter +from .analyzer import PostAnalyzer +from .recategorizer import PostRecategorizer +from .seo_checker import MultiSiteSEOAnalyzer +from .categories import CategoryManager +from .approval import UserApprovalSystem + +logger = logging.getLogger(__name__) + + +class SEOApp: + """ + Main SEO Application class. + + Provides a unified interface for all SEO automation tasks. + Inspired by Ruby on Rails' Active Record pattern. + + Usage: + app = SEOApp() + app.export() + app.analyze() + app.seo_check() + """ + + def __init__(self, verbose: bool = False): + """ + Initialize the SEO application. + + Args: + verbose: Enable verbose logging + """ + self.verbose = verbose + self.output_dir = Path(__file__).parent.parent.parent / 'output' + self.output_dir.mkdir(parents=True, exist_ok=True) + + # Initialize components + self.exporter = None + self.analyzer = None + self.recategorizer = None + self.seo_checker = None + self.category_manager = None + self.approval_system = None + + if verbose: + logging.basicConfig(level=logging.DEBUG) + else: + logging.basicConfig(level=logging.INFO) + + def export(self) -> str: + """ + Export all posts from WordPress sites. + + Returns: + Path to exported CSV file + """ + logger.info("📦 Exporting all posts from WordPress sites...") + self.exporter = PostExporter() + self.exporter.run() + + # Get the exported file path + date_str = datetime.now().strftime('%Y-%m-%d') + csv_file = self.output_dir / f'all_posts_{date_str}.csv' + + logger.info(f"✅ Export completed: {csv_file}") + return str(csv_file) + + def analyze(self, csv_file: Optional[str] = None) -> str: + """ + Analyze posts with AI for recommendations. + + Args: + csv_file: Path to CSV file (uses latest export if not provided) + + Returns: + Path to analysis results + """ + logger.info("🤖 Analyzing posts with AI for recommendations...") + + # Find CSV file + if not csv_file: + csv_file = self._find_latest_export() + + if not csv_file: + raise FileNotFoundError("No exported posts found. Run export() first or provide a CSV file.") + + logger.info(f"Using file: {csv_file}") + + # Run analysis + self.analyzer = PostAnalyzer(csv_file) + self.analyzer.run() + + logger.info("✅ AI analysis completed!") + return csv_file + + def recategorize(self, csv_file: Optional[str] = None) -> str: + """ + Recategorize posts with AI suggestions. + + Args: + csv_file: Path to CSV file (uses latest export if not provided) + + Returns: + Path to recategorization results + """ + logger.info("🏷️ Recategorizing posts with AI suggestions...") + + # Find CSV file + if not csv_file: + csv_file = self._find_latest_export() + + if not csv_file: + raise FileNotFoundError("No exported posts found. Run export() first or provide a CSV file.") + + logger.info(f"Using file: {csv_file}") + + # Run recategorization + self.recategorizer = PostRecategorizer(csv_file) + self.recategorizer.run() + + logger.info("✅ Recategorization completed!") + return csv_file + + def seo_check(self, top_n: int = 10) -> None: + """ + Check SEO quality of titles and descriptions. + + Args: + top_n: Number of top posts to get AI recommendations for + """ + logger.info("🔍 Checking SEO quality of titles/descriptions...") + + self.seo_checker = MultiSiteSEOAnalyzer() + self.seo_checker.run(use_ai=True, top_n=top_n) + + logger.info("✅ SEO check completed!") + + def categories(self) -> None: + """Manage categories across all sites.""" + logger.info("🗂️ Managing categories across all sites...") + + self.category_manager = CategoryManager() + self.category_manager.run() + + logger.info("✅ Category management completed!") + + def approve(self, files: Optional[List[str]] = None) -> None: + """ + Review and approve recommendations. + + Args: + files: List of CSV files to review (auto-detects if not provided) + """ + logger.info("✅ Reviewing and approving recommendations...") + + self.approval_system = UserApprovalSystem() + + if not files: + # Auto-detect recommendation files + files = self._find_recommendation_files() + + if not files: + raise FileNotFoundError("No recommendation files found. Run analyze() or categories() first.") + + logger.info(f"Found {len(files)} recommendation files to review") + self.approval_system.run_interactive_approval(files) + + logger.info("✅ Approval process completed!") + + def full_pipeline(self) -> None: + """ + Run complete workflow: export → analyze → seo_check + """ + logger.info("🚀 Running full SEO automation pipeline...") + + # Step 1: Export + logger.info("\n📦 Step 1/3: Exporting posts...") + self.export() + + # Step 2: Analyze + logger.info("\n🤖 Step 2/3: Analyzing with AI...") + self.analyze() + + # Step 3: SEO Check + logger.info("\n🔍 Step 3/3: Checking SEO quality...") + self.seo_check() + + logger.info("\n✅ Full pipeline completed!") + + def _find_latest_export(self) -> Optional[str]: + """ + Find the latest exported CSV file. + + Returns: + Path to latest CSV file or None if not found + """ + csv_files = list(self.output_dir.glob('all_posts_*.csv')) + + if not csv_files: + return None + + latest = max(csv_files, key=lambda f: f.stat().st_ctime) + return str(latest) + + def _find_recommendation_files(self) -> List[str]: + """ + Find recommendation files in output directory. + + Returns: + List of paths to recommendation files + """ + patterns = [ + 'category_assignments_*.csv', + 'posts_with_ai_recommendations_*.csv', + 'posts_to_move_*.csv', + 'posts_to_consolidate_*.csv', + 'posts_to_delete_*.csv' + ] + + files = [] + for pattern in patterns: + files.extend(self.output_dir.glob(pattern)) + + return [str(f) for f in files] + + def status(self) -> dict: + """ + Get status of output files. + + Returns: + Dictionary with file information + """ + files = list(self.output_dir.glob('*.csv')) + + status_info = { + 'total_files': len(files), + 'files': [] + } + + for file in sorted(files, key=lambda f: f.stat().st_ctime, reverse=True)[:10]: + status_info['files'].append({ + 'name': file.name, + 'size_kb': file.stat().st_size / 1024, + 'modified': datetime.fromtimestamp(file.stat().st_mtime).strftime('%Y-%m-%d %H:%M') + }) + + return status_info diff --git a/src/seo/approval.py b/src/seo/approval.py new file mode 100644 index 0000000..512c41b --- /dev/null +++ b/src/seo/approval.py @@ -0,0 +1,14 @@ +""" +Approval System Module - User approval for recommendations +""" + +import sys +from pathlib import Path + +# Import from scripts directory +scripts_dir = Path(__file__).parent.parent.parent / 'scripts' +sys.path.insert(0, str(scripts_dir)) + +from user_approval import UserApprovalSystem + +__all__ = ['UserApprovalSystem'] diff --git a/src/seo/categories.py b/src/seo/categories.py new file mode 100644 index 0000000..b4b6a6c --- /dev/null +++ b/src/seo/categories.py @@ -0,0 +1,14 @@ +""" +Category Manager Module - Category management across sites +""" + +import sys +from pathlib import Path + +# Import from scripts directory +scripts_dir = Path(__file__).parent.parent.parent / 'scripts' +sys.path.insert(0, str(scripts_dir)) + +from category_manager import CategoryManager + +__all__ = ['CategoryManager'] diff --git a/src/seo/cli.py b/src/seo/cli.py new file mode 100644 index 0000000..04b269f --- /dev/null +++ b/src/seo/cli.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python3 +""" +SEO Automation CLI - Main entry point +Unified command-line interface for SEO automation. +""" + +import sys +import argparse +from pathlib import Path + +# Add src to path +src_dir = Path(__file__).parent / 'src' +sys.path.insert(0, str(src_dir)) + +from seo.app import SEOApp +from seo.config import Config + + +def main(): + """Main CLI entry point.""" + parser = argparse.ArgumentParser( + prog='seo', + description='SEO Automation CLI - Manage WordPress SEO with AI', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + seo export Export all posts from WordPress sites + seo analyze Analyze posts with AI for recommendations + seo analyze posts.csv Analyze specific CSV file + seo recategorize Recategorize posts with AI + seo seo_check Check SEO quality of titles/descriptions + seo categories Manage categories across sites + seo approve Review and approve recommendations + seo full_pipeline Run complete workflow: export → analyze → seo_check + seo status Show output files status + """ + ) + + parser.add_argument('command', nargs='?', help='Command to run') + parser.add_argument('args', nargs='*', help='Arguments for the command') + parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output') + parser.add_argument('--dry-run', action='store_true', help='Show what would be done') + parser.add_argument('--top-n', type=int, default=10, help='Number of top posts for AI analysis') + + args = parser.parse_args() + + if not args.command: + parser.print_help() + return 0 + + # Validate configuration + try: + Config.validate() + except ValueError as e: + print(f"❌ Configuration error: {e}") + return 1 + + # Create application instance + app = SEOApp(verbose=args.verbose) + + # Route to appropriate command + commands = { + 'export': cmd_export, + 'analyze': cmd_analyze, + 'recategorize': cmd_recategorize, + 'seo_check': cmd_seo_check, + 'categories': cmd_categories, + 'approve': cmd_approve, + 'full_pipeline': cmd_full_pipeline, + 'status': cmd_status, + 'help': cmd_help, + } + + if args.command not in commands: + print(f"❌ Unknown command: {args.command}") + print("\nAvailable commands:") + for cmd in sorted(commands.keys()): + print(f" {cmd}") + return 1 + + try: + return commands[args.command](app, args) + except KeyboardInterrupt: + print("\n⚠️ Operation cancelled by user") + return 1 + except FileNotFoundError as e: + print(f"❌ File not found: {e}") + return 1 + except Exception as e: + print(f"❌ Error: {e}") + if args.verbose: + import traceback + traceback.print_exc() + return 1 + + +def cmd_export(app, args): + """Export all posts.""" + if args.dry_run: + print("Would export all posts from WordPress sites") + return 0 + app.export() + return 0 + + +def cmd_analyze(app, args): + """Analyze posts with AI.""" + if args.dry_run: + print("Would analyze posts with AI for recommendations") + return 0 + + csv_file = args.args[0] if args.args else None + app.analyze(csv_file) + return 0 + + +def cmd_recategorize(app, args): + """Recategorize posts with AI.""" + if args.dry_run: + print("Would recategorize posts with AI suggestions") + return 0 + + csv_file = args.args[0] if args.args else None + app.recategorize(csv_file) + return 0 + + +def cmd_seo_check(app, args): + """Check SEO quality.""" + if args.dry_run: + print("Would check SEO quality of titles/descriptions") + return 0 + + app.seo_check(top_n=args.top_n) + return 0 + + +def cmd_categories(app, args): + """Manage categories.""" + if args.dry_run: + print("Would manage categories across all sites") + return 0 + + app.categories() + return 0 + + +def cmd_approve(app, args): + """Approve recommendations.""" + if args.dry_run: + print("Would review and approve recommendations") + return 0 + + files = args.args if args.args else None + app.approve(files) + return 0 + + +def cmd_full_pipeline(app, args): + """Run full pipeline.""" + if args.dry_run: + print("Would run full pipeline: export → analyze → seo_check") + return 0 + + app.full_pipeline() + return 0 + + +def cmd_status(app, args): + """Show status.""" + if args.dry_run: + print("Would show output files status") + return 0 + + status = app.status() + + print("📊 Output files status:") + if status['total_files'] > 0: + print(f"\nFound {status['total_files']} CSV files in output/:") + for file in status['files']: + print(f" {file['name']} ({file['size_kb']:.1f}KB, {file['modified']})") + else: + print(" No CSV files found in output/") + + return 0 + + +def cmd_help(app, args): + """Show help.""" + print(""" +SEO Automation CLI - Available Commands + +Basic Commands: + export Export all posts from WordPress sites + analyze [csv_file] Analyze posts with AI (optional CSV input) + recategorize [csv_file] Recategorize posts with AI (optional CSV input) + seo_check Check SEO quality of titles/descriptions + categories Manage categories across all sites + approve [files...] Review and approve recommendations + full_pipeline Run complete workflow: export → analyze → seo_check + +Utility: + status Show output files status + help Show this help message + +Options: + --verbose, -v Enable verbose logging + --dry-run Show what would be done without doing it + --top-n N Number of top posts for AI analysis (default: 10) + +Examples: + seo export + seo analyze + seo analyze output/all_posts_2026-02-16.csv + seo approve output/category_assignments_*.csv + seo full_pipeline + seo status + """) + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/src/seo/config.py b/src/seo/config.py new file mode 100644 index 0000000..e438536 --- /dev/null +++ b/src/seo/config.py @@ -0,0 +1,70 @@ +""" +Configuration module for SEO application. +Loads configuration from environment variables and YAML. +""" + +import os +import yaml +from dotenv import load_dotenv +from pathlib import Path + +load_dotenv() + + +class Config: + """Configuration class for SEO automation.""" + + CONFIG_FILE = Path(__file__).parent.parent / 'config.yaml' + + if CONFIG_FILE.exists(): + with open(CONFIG_FILE, 'r', encoding='utf-8') as f: + YAML_CONFIG = yaml.safe_load(f) + else: + YAML_CONFIG = {} + + # WordPress Settings + WORDPRESS_URL = os.getenv('WORDPRESS_URL', YAML_CONFIG.get('primary_site', {}).get('url', '')).rstrip('/') + WORDPRESS_USERNAME = os.getenv('WORDPRESS_USERNAME', YAML_CONFIG.get('primary_site', {}).get('username', '')) + WORDPRESS_APP_PASSWORD = os.getenv('WORDPRESS_APP_PASSWORD', YAML_CONFIG.get('primary_site', {}).get('password', '')) + + # Multi-site Configuration + WORDPRESS_SITES = { + 'mistergeek.net': { + 'url': os.getenv('WORDPRESS_MISTERGEEK_URL', YAML_CONFIG.get('wordpress_sites', {}).get('mistergeek.net', {}).get('url', 'https://www.mistergeek.net')), + 'username': os.getenv('WORDPRESS_MISTERGEEK_USERNAME', os.getenv('WORDPRESS_USERNAME', '')), + 'password': os.getenv('WORDPRESS_MISTERGEEK_PASSWORD', os.getenv('WORDPRESS_APP_PASSWORD', '')), + }, + 'webscroll.fr': { + 'url': os.getenv('WORDPRESS_WEBSCROLL_URL', YAML_CONFIG.get('wordpress_sites', {}).get('webscroll.fr', {}).get('url', 'https://www.webscroll.fr')), + 'username': os.getenv('WORDPRESS_WEBSCROLL_USERNAME', os.getenv('WORDPRESS_USERNAME', '')), + 'password': os.getenv('WORDPRESS_WEBSCROLL_PASSWORD', os.getenv('WORDPRESS_APP_PASSWORD', '')), + }, + 'hellogeek.net': { + 'url': os.getenv('WORDPRESS_HELLOGEEK_URL', YAML_CONFIG.get('wordpress_sites', {}).get('hellogeek.net', {}).get('url', 'https://www.hellogeek.net')), + 'username': os.getenv('WORDPRESS_HELLOGEEK_USERNAME', os.getenv('WORDPRESS_USERNAME', '')), + 'password': os.getenv('WORDPRESS_HELLOGEEK_PASSWORD', os.getenv('WORDPRESS_APP_PASSWORD', '')), + } + } + + # OpenRouter API Settings + OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY', YAML_CONFIG.get('ai_model', {}).get('api_key', '')) + AI_MODEL = os.getenv('AI_MODEL', YAML_CONFIG.get('ai_model', {}).get('name', 'anthropic/claude-3.5-sonnet')) + + @classmethod + def validate(cls): + """Validate configuration.""" + errors = [] + + if not cls.WORDPRESS_URL: + errors.append("WORDPRESS_URL is required") + if not cls.WORDPRESS_USERNAME: + errors.append("WORDPRESS_USERNAME is required") + if not cls.WORDPRESS_APP_PASSWORD: + errors.append("WORDPRESS_APP_PASSWORD is required") + if not cls.OPENROUTER_API_KEY: + errors.append("OPENROUTER_API_KEY is required") + + if errors: + raise ValueError("Configuration errors:\n" + "\n".join(f" - {e}" for e in errors)) + + return True diff --git a/src/seo/exporter.py b/src/seo/exporter.py new file mode 100644 index 0000000..2557869 --- /dev/null +++ b/src/seo/exporter.py @@ -0,0 +1,226 @@ +""" +Post Exporter Module - Export posts from WordPress sites +""" + +import csv +import logging +import time +from pathlib import Path +from datetime import datetime +from typing import Dict, List, Optional +import requests +from requests.auth import HTTPBasicAuth +import re + +from .config import Config + +logger = logging.getLogger(__name__) + + +class PostExporter: + """Export posts from WordPress sites to CSV.""" + + def __init__(self): + """Initialize the exporter.""" + self.sites = Config.WORDPRESS_SITES + self.all_posts = [] + self.category_cache = {} + + def fetch_category_names(self, site_name: str, site_config: Dict) -> Dict[int, str]: + """Fetch category names from a WordPress site.""" + if site_name in self.category_cache: + return self.category_cache[site_name] + + logger.info(f" Fetching categories from {site_name}...") + categories = {} + base_url = site_config['url'].rstrip('/') + api_url = f"{base_url}/wp-json/wp/v2/categories" + auth = HTTPBasicAuth(site_config['username'], site_config['password']) + + try: + response = requests.get(api_url, params={'per_page': 100}, auth=auth, timeout=10) + response.raise_for_status() + + for cat in response.json(): + categories[cat['id']] = {'name': cat.get('name', ''), 'slug': cat.get('slug', '')} + logger.info(f" ✓ Fetched {len(categories)} categories") + except Exception as e: + logger.warning(f" Could not fetch categories from {site_name}: {e}") + + self.category_cache[site_name] = categories + return categories + + def fetch_posts_from_site(self, site_name: str, site_config: Dict) -> List[Dict]: + """Fetch all posts from a WordPress site.""" + logger.info(f"\nFetching posts from {site_name}...") + + posts = [] + base_url = site_config['url'].rstrip('/') + api_url = f"{base_url}/wp-json/wp/v2/posts" + auth = HTTPBasicAuth(site_config['username'], site_config['password']) + + for status in ['publish', 'draft']: + page = 1 + status_count = 0 + + while True: + try: + logger.info(f" Fetching page {page} ({status} posts)...") + response = requests.get( + api_url, + params={'page': page, 'per_page': 100, 'status': status}, + auth=auth, + timeout=10 + ) + response.raise_for_status() + + page_posts = response.json() + if not page_posts: + break + + posts.extend(page_posts) + status_count += len(page_posts) + logger.info(f" ✓ Got {len(page_posts)} posts (total: {len(posts)})") + + page += 1 + time.sleep(0.5) + + except requests.exceptions.HTTPError as e: + if response.status_code == 400: + logger.info(f" ℹ API limit reached (got {status_count} {status} posts)") + break + else: + logger.error(f"Error on page {page}: {e}") + break + except requests.exceptions.RequestException as e: + logger.error(f"Error fetching from {site_name}: {e}") + break + + logger.info(f"✓ Total posts from {site_name}: {len(posts)}\n") + return posts + + def extract_post_details(self, post: Dict, site_name: str, category_map: Dict) -> Dict: + """Extract post details for CSV export.""" + title = post.get('title', {}) + if isinstance(title, dict): + title = title.get('rendered', '') + + content = post.get('content', {}) + if isinstance(content, dict): + content = content.get('rendered', '') + content_text = re.sub('<[^<]+?>', '', content)[:500] + + excerpt = post.get('excerpt', {}) + if isinstance(excerpt, dict): + excerpt = excerpt.get('rendered', '') + excerpt_text = re.sub('<[^<]+?>', '', excerpt) + + meta_dict = post.get('meta', {}) if isinstance(post.get('meta'), dict) else {} + meta_description = ( + meta_dict.get('rank_math_description', '') or + meta_dict.get('_yoast_wpseo_metadesc', '') or '' + ) + + category_ids = post.get('categories', []) + category_names = ', '.join([ + category_map.get(cat_id, {}).get('name', str(cat_id)) + for cat_id in category_ids + ]) if category_ids else '' + + return { + 'site': site_name, + 'post_id': post['id'], + 'status': post.get('status', 'publish'), + 'title': title.strip(), + 'slug': post.get('slug', ''), + 'url': post.get('link', ''), + 'author_id': post.get('author', ''), + 'date_published': post.get('date', ''), + 'date_modified': post.get('modified', ''), + 'categories': category_names, + 'tags': ', '.join([str(t) for t in post.get('tags', [])]), + 'excerpt': excerpt_text.strip(), + 'content_preview': content_text.strip(), + 'seo_title': meta_dict.get('rank_math_title', ''), + 'meta_description': meta_description, + 'focus_keyword': meta_dict.get('rank_math_focus_keyword', ''), + 'word_count': len(content_text.split()), + } + + def export_to_csv(self, output_file: Optional[str] = None) -> str: + """Export all posts to CSV.""" + if not output_file: + output_dir = Path(__file__).parent.parent.parent / 'output' + output_dir.mkdir(parents=True, exist_ok=True) + date_str = datetime.now().strftime('%Y-%m-%d') + output_file = output_dir / f'all_posts_{date_str}.csv' + + output_file = Path(output_file) + output_file.parent.mkdir(parents=True, exist_ok=True) + + if not self.all_posts: + logger.error("No posts to export") + return None + + fieldnames = [ + 'site', 'post_id', 'status', 'title', 'slug', 'url', 'author_id', + 'date_published', 'date_modified', 'categories', 'tags', 'excerpt', + 'content_preview', 'seo_title', 'meta_description', 'focus_keyword', 'word_count', + ] + + logger.info(f"Exporting {len(self.all_posts)} posts to CSV...") + + with open(output_file, 'w', newline='', encoding='utf-8') as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(self.all_posts) + + logger.info(f"✓ CSV exported to: {output_file}") + return str(output_file) + + def run(self): + """Run the complete export process.""" + logger.info("="*70) + logger.info("EXPORTING ALL POSTS FOR AI DECISION MAKING") + logger.info("="*70) + logger.info("Sites configured: " + ", ".join(self.sites.keys())) + + for site_name, config in self.sites.items(): + categories = self.fetch_category_names(site_name, config) + posts = self.fetch_posts_from_site(site_name, config) + + if posts: + for post in posts: + post_details = self.extract_post_details(post, site_name, categories) + self.all_posts.append(post_details) + + if not self.all_posts: + logger.error("No posts found on any site") + return + + self.all_posts.sort(key=lambda x: (x['site'], x['post_id'])) + self.export_to_csv() + + # Print summary + logger.info("\n" + "="*70) + logger.info("EXPORT SUMMARY") + logger.info("="*70) + + by_site = {} + for post in self.all_posts: + site = post['site'] + if site not in by_site: + by_site[site] = {'total': 0, 'published': 0, 'draft': 0} + by_site[site]['total'] += 1 + if post['status'] == 'publish': + by_site[site]['published'] += 1 + else: + by_site[site]['draft'] += 1 + + for site, stats in sorted(by_site.items()): + logger.info(f"\n{site}:") + logger.info(f" Total: {stats['total']}") + logger.info(f" Published: {stats['published']}") + logger.info(f" Drafts: {stats['draft']}") + + logger.info(f"\n✓ Export complete!") diff --git a/src/seo/recategorizer.py b/src/seo/recategorizer.py new file mode 100644 index 0000000..628f098 --- /dev/null +++ b/src/seo/recategorizer.py @@ -0,0 +1,14 @@ +""" +Recategorizer Module - AI-powered post recategorization +""" + +import sys +from pathlib import Path + +# Import from scripts directory +scripts_dir = Path(__file__).parent.parent.parent / 'scripts' +sys.path.insert(0, str(scripts_dir)) + +from ai_recategorize_posts import PostRecategorizer + +__all__ = ['PostRecategorizer'] diff --git a/src/seo/seo_checker.py b/src/seo/seo_checker.py new file mode 100644 index 0000000..e4993c1 --- /dev/null +++ b/src/seo/seo_checker.py @@ -0,0 +1,14 @@ +""" +SEO Checker Module - SEO quality analysis +""" + +import sys +from pathlib import Path + +# Import from scripts directory +scripts_dir = Path(__file__).parent.parent.parent / 'scripts' +sys.path.insert(0, str(scripts_dir)) + +from multi_site_seo_analyzer import MultiSiteSEOAnalyzer + +__all__ = ['MultiSiteSEOAnalyzer']