import re import json import unicodedata import io from flask import current_app from gradio_client import Client import pandas as pd from PIL import Image import base64 class ContentService: """Service for AI content generation using Hugging Face models.""" def __init__(self, hugging_key=None): # Use provided key or fall back to app config self.hugging_key = hugging_key or current_app.config.get('HUGGING_KEY') # Initialize the Gradio client for content generation self.client = Client("Zelyanoth/Linkedin_poster_dev", hf_token=self.hugging_key) def validate_unicode_content(self, content): """Validate Unicode content while preserving original formatting and spaces.""" if not content or not isinstance(content, str): return content try: # Test if content can be encoded as UTF-8 content.encode('utf-8') return content # Return original content if it's valid UTF-8 except UnicodeEncodeError: try: # If encoding fails, try to preserve as much as possible return content.encode('utf-8', errors='replace').decode('utf-8') except: # Ultimate fallback return str(content) def preserve_formatting(self, content): """Preserve spaces, line breaks, and paragraph formatting.""" if not content: return content # Preserve all whitespace characters including spaces, tabs, and newlines # This ensures that paragraph breaks and indentation are maintained try: # Test encoding first content.encode('utf-8') return content except UnicodeEncodeError: # Fallback with error replacement but preserve whitespace return content.encode('utf-8', errors='replace').decode('utf-8') def sanitize_content_for_api(self, content): """Sanitize content for API calls while preserving original text, spaces, and formatting.""" if not content: return content # First preserve formatting and spaces preserved = self.preserve_formatting(content) # Only validate Unicode, don't remove spaces or formatting validated = self.validate_unicode_content(preserved) # Only remove null bytes that might cause issues in API calls if '\x00' in validated: validated = validated.replace('\x00', '') # Ensure line breaks and spaces are preserved validated = validated.replace('\r\n', '\n').replace('\r', '\n') return validated def _is_base64_image(self, data): """Check if the data is a base64 encoded image string.""" if not isinstance(data, str): return False # Check if it starts with data URL prefix if data.startswith('data:image/'): return True # Try to decode as base64 try: # Extract base64 part if it's a data URL if ',' in data: base64_part = data.split(',')[1] else: base64_part = data # Try to decode base64.b64decode(base64_part, validate=True) return True except Exception: return False def _base64_to_bytes(self, base64_string): """Convert a base64 encoded string to bytes.""" try: # If it's a data URL, extract the base64 part if base64_string.startswith('data:image/'): base64_part = base64_string.split(',')[1] else: base64_part = base64_string # Decode base64 to bytes return base64.b64decode(base64_part, validate=True) except Exception as e: current_app.logger.error(f"Failed to decode base64 image: {str(e)}") raise Exception(f"Failed to decode base64 image: {str(e)}") def generate_post_content(self, user_id: str) -> tuple: """ Generate post content using AI. Args: user_id (str): User ID for personalization Returns: tuple: (Generated post content, Image URL or None) """ try: # Call the Hugging Face model to generate content result = self.client.predict( code=user_id, api_name="/poster_linkedin" ) # Parse the result (assuming it returns a list with content as first element) # First try to parse as JSON try: parsed_result = json.loads(result) except json.JSONDecodeError: # If JSON parsing fails, check if it's already a Python list/object try: # Try to evaluate as Python literal (safe for lists/dicts) import ast parsed_result = ast.literal_eval(result) except (ValueError, SyntaxError): # If that fails, treat the result as a plain string parsed_result = [result] # Extract the first element if it's a list if isinstance(parsed_result, list): generated_content = parsed_result[0] if parsed_result and parsed_result[0] is not None else "Generated content will appear here..." # Extract the second element as image URL if it exists image_data = parsed_result[1] if len(parsed_result) > 1 and parsed_result[1] is not None else None else: generated_content = str(parsed_result) if parsed_result is not None else "Generated content will appear here..." image_data = None # Validate, sanitize, and preserve formatting of the generated content sanitized_content = self.sanitize_content_for_api(generated_content) # Ensure paragraph breaks and formatting are preserved final_content = self.preserve_formatting(sanitized_content) # Handle image data - could be URL or base64 image_bytes = None if image_data: if self._is_base64_image(image_data): # Convert base64 to bytes for storage image_bytes = self._base64_to_bytes(image_data) else: # It's a URL, keep as string image_bytes = image_data return (final_content, image_bytes) except Exception as e: error_message = str(e) current_app.logger.error(f"Content generation failed: {error_message}") raise Exception(f"Content generation failed: {error_message}") def add_rss_source(self, rss_link: str, user_id: str) -> str: """ Add an RSS source for content generation. Args: rss_link (str): RSS feed URL user_id (str): User ID Returns: str: Result message """ try: # Call the Hugging Face model to add RSS source rss_input = f"{rss_link}__thi_irrh'èçs_my_id__! {user_id}" sanitized_rss_input = self.sanitize_content_for_api(rss_input) result = self.client.predict( rss_link=sanitized_rss_input, api_name="/ajouter_rss" ) # Sanitize and preserve formatting of the result sanitized_result = self.sanitize_content_for_api(result) return self.preserve_formatting(sanitized_result) except Exception as e: raise Exception(f"Failed to add RSS source: {str(e)}")