Spaces:
Sleeping
Sleeping
from openai import OpenAI | |
import gradio as gr | |
import requests | |
from datetime import date | |
from test_web_rag import get_docs_from_web | |
import json | |
import os | |
from dotenv import load_dotenv | |
load_dotenv() | |
# Replace with your key | |
client = OpenAI() | |
you_key = os.getenv("YOU_API_KEY") | |
def get_ai_snippets_for_query(query): | |
headers = {"X-API-Key": you_key} | |
params = {"query": query} | |
return requests.get( | |
f"https://api.ydc-index.io/search?query={query}", | |
params=params, | |
headers=headers, | |
).json().get('hits') | |
def get_web_search_you(query): | |
docs = get_ai_snippets_for_query(query) | |
markdown = "" | |
for doc in docs: | |
for key, value in doc.items(): | |
if key == 'snippets': | |
markdown += f"{key}:\n" | |
for snippet in value: | |
markdown += f"- {snippet}\n" | |
else: | |
markdown += f"{key}: {value}\n" | |
markdown += "\n" | |
return markdown | |
def predict(message, history, _n_web_search, _strategy): | |
# docs = get_web_search_you(message) | |
with open('history.json', mode='a', encoding='utf-8') as f: | |
json.dump(history, f) | |
docs = get_docs_from_web(message, history[-1:], _n_web_search, _strategy) | |
partial_message = '' | |
information = '' | |
for doc in docs: | |
if isinstance(doc, dict): | |
information = doc.get('data') | |
else: | |
partial_message = partial_message + doc | |
yield partial_message | |
system_prompt = """ | |
You are an advanced chatbot. | |
Today's date - {date} | |
When answering a question, adhere to the following revised rules: | |
- The "Information for reference" data is provided in the chunks with each chunk having its own source as url. | |
- Generate human-like text in response to input, reflecting your status as a sophisticated language model. | |
- Abstain from offering any health or medical advice and ensure all responses maintain this guideline strictly. | |
- Format all responses in markdown format consistently throughout interactions. | |
- Must cite sources from the information at the conclusion of your response using properly titled references, but only if the information you provided comes from sources that can be cited. | |
Information for reference: | |
"{context}" | |
Your answer should be structured in markdown as follows: | |
<Answer> | |
**Sources**: | |
Include this section only if the provided information contains sources. If sources are included, list them as follows: | |
- [Title of Source 1](URL to Source 1) | |
- [Title of Source 2](URL to Source 2) | |
... as needed. If no sources are provided, do not include this section in answer. | |
""".format(context=information, question=message, date=date.today().strftime('%B %d, %Y')) | |
history_openai_format = [{"role": "system", "content": system_prompt}] | |
for human, assistant in history: | |
history_openai_format.append({"role": "user", "content": human}) | |
history_openai_format.append({"role": "assistant", "content": assistant}) | |
history_openai_format.append({"role": "user", "content": message}) | |
# print(history_openai_format) | |
response = client.chat.completions.create(model='gpt-4-turbo', | |
messages=history_openai_format, | |
temperature=0.5, | |
max_tokens=1000, | |
top_p=0.5, | |
stream=True) | |
partial_message += '\n\n' | |
for chunk in response: | |
if chunk.choices[0].delta.content is not None: | |
partial_message = partial_message + chunk.choices[0].delta.content | |
yield partial_message | |
n_web_search = gr.Slider(1, 10, value=3, step=1, label="Web searches", | |
info="Choose between 1 and 10 number of web searches to do. Remember more the web searches more it will take time to reply.") | |
strategy = gr.Radio(["Deep", "Normal"], label="Strategy", value="Normal", | |
info="Select web search analysis type. Please keep in mind that deep analysis will take more time than normal analysis.") | |
app = gr.ChatInterface(predict, additional_inputs=[n_web_search, strategy]) | |
app.queue(default_concurrency_limit=5) | |
app.launch(debug=True, share=False) | |