morethanair commited on
Commit
b59f879
ยท
1 Parent(s): 35d2b81

Add HF hub fallback and dependency

Browse files
Files changed (2) hide show
  1. app.py +15 -5
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,12 +1,21 @@
1
  import streamlit as st
2
  import os
3
  from pinecone import Pinecone
4
- from sentence_transformers import SentenceTransformer
5
  from typing import List, Dict
 
 
 
 
 
 
 
 
 
 
 
6
  import re # For parsing timestamp and extracting video ID
7
  import streamlit.components.v1 as components # For embedding HTML
8
  from openai import OpenAI # Import OpenAI library
9
- import logging
10
 
11
  # Setup logging
12
  logging.basicConfig(level=logging.INFO)
@@ -84,7 +93,8 @@ def generate_youtube_embed_html(youtube_url: str, timestamp: str) -> str | None:
84
  PINECONE_API_KEY = os.getenv("PINECONE_API_KEY","pcsk_PZHLK_TRAvMCyNmJM4FKGCX7rbbY22a58fhnWYasx1mf3WL6sRasoASZXfsbnJYvCQ13w") # Load from environment variable
85
  PINECONE_ENV = os.getenv("PINECONE_ENV", "us-east-1")
86
  INDEX_NAME = "video-embeddings"
87
- EMBEDDING_MODEL = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
 
88
 
89
  # OpenAI ์„ค์ •
90
  OPENAI_API_KEY = "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"
@@ -109,8 +119,8 @@ def init_pinecone():
109
  def load_embedding_model():
110
  """Sentence Transformer ๋ชจ๋ธ์„ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค."""
111
  try:
112
- model = SentenceTransformer(EMBEDDING_MODEL)
113
- logger.info(f"Successfully loaded embedding model: {EMBEDDING_MODEL}")
114
  return model
115
  except Exception as e:
116
  st.error(f"์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
 
1
  import streamlit as st
2
  import os
3
  from pinecone import Pinecone
 
4
  from typing import List, Dict
5
+ import logging
6
+
7
+ # Compatibility shim for older huggingface_hub versions
8
+ try:
9
+ from huggingface_hub import cached_download
10
+ except Exception:
11
+ from huggingface_hub import hf_hub_download as cached_download
12
+ import huggingface_hub
13
+ huggingface_hub.cached_download = hf_hub_download
14
+
15
+ from sentence_transformers import SentenceTransformer
16
  import re # For parsing timestamp and extracting video ID
17
  import streamlit.components.v1 as components # For embedding HTML
18
  from openai import OpenAI # Import OpenAI library
 
19
 
20
  # Setup logging
21
  logging.basicConfig(level=logging.INFO)
 
93
  PINECONE_API_KEY = os.getenv("PINECONE_API_KEY","pcsk_PZHLK_TRAvMCyNmJM4FKGCX7rbbY22a58fhnWYasx1mf3WL6sRasoASZXfsbnJYvCQ13w") # Load from environment variable
94
  PINECONE_ENV = os.getenv("PINECONE_ENV", "us-east-1")
95
  INDEX_NAME = "video-embeddings"
96
+ # Use a lightweight multilingual model that works well with Korean
97
+ EMBEDDING_MODEL_NAME = "intfloat/multilingual-e5-small"
98
 
99
  # OpenAI ์„ค์ •
100
  OPENAI_API_KEY = "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"
 
119
  def load_embedding_model():
120
  """Sentence Transformer ๋ชจ๋ธ์„ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค."""
121
  try:
122
+ model = SentenceTransformer(EMBEDDING_MODEL_NAME)
123
+ logger.info(f"Successfully loaded embedding model: {EMBEDDING_MODEL_NAME}")
124
  return model
125
  except Exception as e:
126
  st.error(f"์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
requirements.txt CHANGED
@@ -2,6 +2,7 @@ streamlit
2
  pinecone
3
  numpy<2.0
4
  pandas
5
- openai
 
6
  sentence-transformers==2.2.2
7
  torch==2.2.0
 
2
  pinecone
3
  numpy<2.0
4
  pandas
5
+ openai
6
+ huggingface-hub
7
  sentence-transformers==2.2.2
8
  torch==2.2.0