Spaces:

twimbit-ai
/

twimbit

Runtime error

App Files Files Community

Siddhant commited on Apr 19, 2023

Commit

1969750

unverified ·

1 Parent(s): acbe0e7

initial commit

Browse files

Files changed (5) hide show

.gitignore +129 -0
app.py +109 -0
ingest_data.py +46 -0
query_data.py +30 -0
requirements.txt +6 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,129 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/

app.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import os
+from typing import Optional, Tuple
+import gradio as gr
+from query_data import get_chain
+from threading import Lock
+import pinecone
+from langchain.vectorstores import Chroma, Pinecone
+from langchain.embeddings.openai import OpenAIEmbeddings
+embeddings = OpenAIEmbeddings()
+PINECONE_API_KEY = '6af52b8a-a3df-4189-899b-b21163027bb8'
+PINECONE_API_ENV = 'asia-southeast1-gcp'
+# initialize pinecone
+pinecone.init(
+    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
+    environment=PINECONE_API_ENV  # next to api key in console
+)
+index_name = "twimbit-answer"
+vectorstore = Pinecone.from_existing_index(index_name=index_name, embedding=embeddings)
+api_key = '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'
+class ChatWrapper:
+    def __init__(self):
+        self.lock = Lock()
+    def __call__(
+            self, inp: str, history: Optional[Tuple[str, str]], chain
+    ):
+        """Execute the chat functionality."""
+        self.lock.acquire()
+        try:
+            history = history or []
+            # If chain is None, that is because no API key was provided.
+            # if chain is None:
+            #     history.append((inp, "Please paste your OpenAI key to use"))
+            #     return history, history
+            # Set OpenAI key
+            if api_key:
+                os.environ["OPENAI_API_KEY"] = api_key
+                chain = get_chain(vectorstore)
+                os.environ["OPENAI_API_KEY"] = ""
+            import openai
+            openai.api_key = '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'
+            # Run chain and append input.
+            output = chain({"question": inp, "chat_history": history})["answer"]
+            history.append((inp, output))
+        except Exception as e:
+            raise e
+        finally:
+            self.lock.release()
+        return history, history
+chat = ChatWrapper()
+block = gr.Blocks(css=".gradio-container {background-color: #111827};footer "
+                      "{visibility: hidden};")
+with block:
+    # with gr.Row():
+    #     openai_api_key_textbox = gr.Textbox(
+    #         placeholder="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",
+    #         show_label=False,
+    #         lines=1,
+    #         type="password",
+    #         value="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"
+    #     )
+    chatbot = gr.Chatbot().style(height=500)
+    with gr.Row():
+        message = gr.Textbox(
+            label="What's your question?",
+            placeholder="Ask questions about reports",
+            lines=1,
+        )
+        submit = gr.Button(value="Send", variant="secondary").style(full_width=False)
+    # gr.Examples(
+    #     examples=[
+    #         "What did the president say about Kentaji Brown Jackson",
+    #         "Did he mention Stephen Breyer?",
+    #         "What was his stance on Ukraine",
+    #     ],
+    #     inputs=message,
+    # )
+    state = gr.State()
+    agent_state = gr.State()
+    submit.click(chat, inputs=[message, state, agent_state], outputs=[chatbot, state])
+    message.submit(chat, inputs=[message, state, agent_state], outputs=[chatbot, state])
+    # openai_api_key_textbox.change(
+    #     set_openai_api_key,
+    #     inputs=[openai_api_key_textbox],
+    #     outputs=[agent_state],
+    # )
+# block.launch(debug=True)
+block.launch(debug=True, auth=('admin', 'password'), auth_message='enter username password to proceed further')

ingest_data.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.document_loaders import UnstructuredFileLoader, CSVLoader
+from langchain.vectorstores.faiss import FAISS
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.vectorstores import Chroma, Pinecone
+import pickle
+import pinecone
+# Load Data
+# loader = UnstructuredFileLoader("output.md")
+# raw_documents = loader.load()
+loader = CSVLoader(file_path='./posts.csv', source_column="Post Title", encoding='utf-8')
+raw_documents = loader.load()
+# Split text
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=0)
+documents = text_splitter.split_documents(raw_documents)
+#
+# # Load Data to vectorstore
+embeddings = OpenAIEmbeddings()
+# vectorstore = FAISS.from_documents(documents, embeddings)
+# # Save vectorstore
+# with open("posts.pkl", "wb") as f:
+#     pickle.dump(vectorstore, f)
+PINECONE_API_KEY = '6af52b8a-a3df-4189-899b-b21163027bb8'
+PINECONE_API_ENV = 'asia-southeast1-gcp'
+# initialize pinecone
+pinecone.init(
+    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
+    environment=PINECONE_API_ENV  # next to api key in console
+)
+index_name = "twimbit-answer"
+Pinecone.from_texts([t.page_content for t in documents], embeddings, index_name=index_name)
+# query = "How many neo banks are in india ?"
+#
+# docsearch = Pinecone.from_existing_index(index_name=index_name, embedding=embeddings)
+#
+# docs = docsearch.similarity_search(query, include_metadata=True)

query_data.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from langchain.prompts.prompt import PromptTemplate
+from langchain.llms import OpenAI, OpenAIChat
+from langchain.chains import ChatVectorDBChain, ConversationalRetrievalChain
+from langchain.chat_models import ChatOpenAI
+_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a
+standalone question.
+Chat History:
+{chat_history}
+Follow Up Input: {question}
+Standalone question:"""
+CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
+template = """You are a personal assistance for twimbit company for answering questions. You are given the following
+extracted parts of a long document and a question. Provide a brief answer. If you don't know the answer, just say "
+I'm not sure." Question: {question} ========= {context} ========= Answer in Markdown: """
+QA_PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])
+def get_chain(vectorstore):
+    llm = ChatOpenAI(temperature=0)
+    qa_chain = ConversationalRetrievalChain.from_llm(
+        llm,
+        vectorstore.as_retriever(search_kwargs={"k": 4})
+        # qa_prompt=QA_PROMPT,
+        # condense_question_prompt=CONDENSE_QUESTION_PROMPT,
+    )
+    return qa_chain

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+langchain~=0.0.123
+openai
+unstructured
+faiss-cpu
+gradio
+pinecone-client