Coool2 commited on
Commit
252ce8c
·
verified ·
1 Parent(s): 38f293d

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +19 -15
agent.py CHANGED
@@ -114,15 +114,8 @@ Settings.callback_manager = callback_manager
114
  def read_and_parse_content(input_path: str) -> List[Document]:
115
  """
116
  Reads and parses content from a file path or URL into Document objects.
117
- It automatically detects the input type and uses the appropriate LlamaIndex reader.
118
-
119
- Args:
120
- input_path: A local file path or a web URL.
121
-
122
- Returns:
123
- A list of LlamaIndex Document objects with the extracted text.
124
  """
125
- # --- Completed readers map for various local file types ---
126
  readers_map = {
127
  # Documents
128
  '.pdf': PDFReader(),
@@ -132,11 +125,8 @@ def read_and_parse_content(input_path: str) -> List[Document]:
132
  '.csv': CSVReader(),
133
  '.json': JSONReader(),
134
  '.xlsx': PandasExcelReader(),
135
- # Media files
136
- '.jpg': ImageReader(),
137
- '.jpeg': ImageReader(),
138
- '.png': ImageReader(),
139
- '.mp3': AssemblyAIAudioTranscriptReader(input_path),
140
  }
141
 
142
  # --- URL Handling ---
@@ -155,11 +145,25 @@ def read_and_parse_content(input_path: str) -> List[Document]:
155
 
156
  file_extension = os.path.splitext(input_path)[1].lower()
157
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  if file_extension in readers_map:
159
  loader = readers_map[file_extension]
160
  documents = loader.load_data(file=input_path)
161
  else:
162
- # Fallback for text-based files without a specific reader (e.g., .py, .txt, .md)
163
  try:
164
  with open(input_path, 'r', encoding='utf-8') as f:
165
  content = f.read()
@@ -167,7 +171,7 @@ def read_and_parse_content(input_path: str) -> List[Document]:
167
  except Exception as e:
168
  return [Document(text=f"Error reading file as plain text: {e}")]
169
 
170
- # Add the source path to metadata for traceability
171
  for doc in documents:
172
  doc.metadata["source"] = input_path
173
 
 
114
  def read_and_parse_content(input_path: str) -> List[Document]:
115
  """
116
  Reads and parses content from a file path or URL into Document objects.
 
 
 
 
 
 
 
117
  """
118
+ # --- Readers map sans initialisation prématurée ---
119
  readers_map = {
120
  # Documents
121
  '.pdf': PDFReader(),
 
125
  '.csv': CSVReader(),
126
  '.json': JSONReader(),
127
  '.xlsx': PandasExcelReader(),
128
+ # Audio files - traitement spécial
129
+ # '.mp3': sera géré séparément
 
 
 
130
  }
131
 
132
  # --- URL Handling ---
 
145
 
146
  file_extension = os.path.splitext(input_path)[1].lower()
147
 
148
+ if file_extension in ['.mp3', '.mp4', '.wav', '.m4a', '.flac']:
149
+ try:
150
+ loader = AssemblyAIAudioTranscriptReader(file_path=input_path)
151
+ documents = loader.load_data()
152
+ return documents
153
+ except Exception as e:
154
+ return [Document(text=f"Error transcribing audio: {e}")]
155
+
156
+ if file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']:
157
+ return [Document(
158
+ text=f"IMAGE_PATH:{input_path}",
159
+ metadata={"source": input_path, "type": "image", "path": input_path}
160
+ )]
161
+
162
  if file_extension in readers_map:
163
  loader = readers_map[file_extension]
164
  documents = loader.load_data(file=input_path)
165
  else:
166
+ # Fallback pour les fichiers texte
167
  try:
168
  with open(input_path, 'r', encoding='utf-8') as f:
169
  content = f.read()
 
171
  except Exception as e:
172
  return [Document(text=f"Error reading file as plain text: {e}")]
173
 
174
+ # Ajouter les métadonnées de source
175
  for doc in documents:
176
  doc.metadata["source"] = input_path
177