rohit commited on
Commit
cfa1426
·
1 Parent(s): 1d87783

Create self-contained app.py with dynamic imports to fix module loading issues

Browse files
Files changed (2) hide show
  1. Dockerfile +1 -1
  2. main.py → app.py +13 -26
Dockerfile CHANGED
@@ -21,4 +21,4 @@ RUN chmod +x start.sh
21
  EXPOSE 7860
22
 
23
  # Run the FastAPI application
24
- CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
21
  EXPOSE 7860
22
 
23
  # Run the FastAPI application
24
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
main.py → app.py RENAMED
@@ -3,16 +3,6 @@ from pydantic import BaseModel
3
  import os
4
  import logging
5
  import sys
6
- from app.config import DATASET_CONFIGS
7
- # Lazy imports to avoid blocking startup
8
- # from .pipeline import RAGPipeline # Will import when needed
9
- # import umap # Will import when needed for visualization
10
- # import plotly.express as px # Will import when needed for visualization
11
- # import plotly.graph_objects as go # Will import when needed for visualization
12
- # from plotly.subplots import make_subplots # Will import when needed for visualization
13
- # import numpy as np # Will import when needed for visualization
14
- # from sklearn.preprocessing import normalize # Will import when needed for visualization
15
- # import pandas as pd # Will import when needed for visualization
16
  import json
17
 
18
  # Configure logging
@@ -32,20 +22,14 @@ pipelines = {}
32
  google_api_key = os.getenv("GOOGLE_API_KEY")
33
 
34
  logger.info(f"Starting RAG Pipeline API")
35
- logger.info(f"Port from env: {os.getenv('PORT', 'Not set - will use 8000')}")
36
  logger.info(f"Google API Key present: {'Yes' if google_api_key else 'No'}")
37
- logger.info(f"Available datasets: {list(DATASET_CONFIGS.keys())}")
38
 
39
  # Don't load datasets during startup - do it asynchronously after server starts
40
  logger.info("RAG Pipeline API is ready to serve requests - datasets will load in background")
41
 
42
- # Visualization function disabled to speed up startup
43
- # def create_3d_visualization(pipeline):
44
- # ... (commented out for faster startup)
45
-
46
  class Question(BaseModel):
47
  text: str
48
- dataset: str = "developer-portfolio" # Default dataset
49
 
50
  @app.post("/answer")
51
  async def get_answer(question: Question):
@@ -77,11 +61,15 @@ async def load_datasets_background():
77
  """Load datasets in background after server starts"""
78
  global pipelines
79
  if google_api_key:
80
- # Import RAGPipeline only when needed
81
- from app.pipeline import RAGPipeline
82
- # Only load developer-portfolio to save memory
83
- dataset_name = "developer-portfolio"
84
  try:
 
 
 
 
 
 
 
 
85
  logger.info(f"Loading dataset: {dataset_name}")
86
  pipeline = RAGPipeline.from_preset(
87
  google_api_key=google_api_key,
@@ -90,7 +78,7 @@ async def load_datasets_background():
90
  pipelines[dataset_name] = pipeline
91
  logger.info(f"Successfully loaded {dataset_name}")
92
  except Exception as e:
93
- logger.error(f"Failed to load {dataset_name}: {e}")
94
  logger.info(f"Background loading complete - {len(pipelines)} datasets loaded")
95
  else:
96
  logger.warning("No Google API key provided - running in demo mode without datasets")
@@ -98,7 +86,7 @@ async def load_datasets_background():
98
  @app.on_event("startup")
99
  async def startup_event():
100
  logger.info("FastAPI application startup complete")
101
- logger.info(f"Server should be running on port: {os.getenv('PORT', '8000')}")
102
 
103
  # Start loading datasets in background (non-blocking)
104
  import asyncio
@@ -123,6 +111,5 @@ async def health_check():
123
  "datasets_loaded": len(pipelines),
124
  "total_datasets": 1, # Only loading developer-portfolio
125
  "loading_status": loading_status,
126
- "port": os.getenv('PORT', '8000')
127
- }
128
-
 
3
  import os
4
  import logging
5
  import sys
 
 
 
 
 
 
 
 
 
 
6
  import json
7
 
8
  # Configure logging
 
22
  google_api_key = os.getenv("GOOGLE_API_KEY")
23
 
24
  logger.info(f"Starting RAG Pipeline API")
 
25
  logger.info(f"Google API Key present: {'Yes' if google_api_key else 'No'}")
 
26
 
27
  # Don't load datasets during startup - do it asynchronously after server starts
28
  logger.info("RAG Pipeline API is ready to serve requests - datasets will load in background")
29
 
 
 
 
 
30
  class Question(BaseModel):
31
  text: str
32
+ dataset: str = "developer-portfolio"
33
 
34
  @app.post("/answer")
35
  async def get_answer(question: Question):
 
61
  """Load datasets in background after server starts"""
62
  global pipelines
63
  if google_api_key:
 
 
 
 
64
  try:
65
+ # Import modules only when needed
66
+ import sys
67
+ sys.path.append('/app')
68
+ from app.pipeline import RAGPipeline
69
+ from app.config import DATASET_CONFIGS
70
+
71
+ # Only load developer-portfolio to save memory
72
+ dataset_name = "developer-portfolio"
73
  logger.info(f"Loading dataset: {dataset_name}")
74
  pipeline = RAGPipeline.from_preset(
75
  google_api_key=google_api_key,
 
78
  pipelines[dataset_name] = pipeline
79
  logger.info(f"Successfully loaded {dataset_name}")
80
  except Exception as e:
81
+ logger.error(f"Failed to load dataset: {e}")
82
  logger.info(f"Background loading complete - {len(pipelines)} datasets loaded")
83
  else:
84
  logger.warning("No Google API key provided - running in demo mode without datasets")
 
86
  @app.on_event("startup")
87
  async def startup_event():
88
  logger.info("FastAPI application startup complete")
89
+ logger.info(f"Server should be running on port: 7860")
90
 
91
  # Start loading datasets in background (non-blocking)
92
  import asyncio
 
111
  "datasets_loaded": len(pipelines),
112
  "total_datasets": 1, # Only loading developer-portfolio
113
  "loading_status": loading_status,
114
+ "port": "7860"
115
+ }