Siddhant commited on
Commit
1969750
·
unverified ·
1 Parent(s): acbe0e7

initial commit

Browse files
Files changed (5) hide show
  1. .gitignore +129 -0
  2. app.py +109 -0
  3. ingest_data.py +46 -0
  4. query_data.py +30 -0
  5. requirements.txt +6 -0
.gitignore ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # pipenv
88
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
90
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
91
+ # install all needed dependencies.
92
+ #Pipfile.lock
93
+
94
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95
+ __pypackages__/
96
+
97
+ # Celery stuff
98
+ celerybeat-schedule
99
+ celerybeat.pid
100
+
101
+ # SageMath parsed files
102
+ *.sage.py
103
+
104
+ # Environments
105
+ .env
106
+ .venv
107
+ env/
108
+ venv/
109
+ ENV/
110
+ env.bak/
111
+ venv.bak/
112
+
113
+ # Spyder project settings
114
+ .spyderproject
115
+ .spyproject
116
+
117
+ # Rope project settings
118
+ .ropeproject
119
+
120
+ # mkdocs documentation
121
+ /site
122
+
123
+ # mypy
124
+ .mypy_cache/
125
+ .dmypy.json
126
+ dmypy.json
127
+
128
+ # Pyre type checker
129
+ .pyre/
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Optional, Tuple
3
+
4
+ import gradio as gr
5
+ from query_data import get_chain
6
+ from threading import Lock
7
+ import pinecone
8
+ from langchain.vectorstores import Chroma, Pinecone
9
+ from langchain.embeddings.openai import OpenAIEmbeddings
10
+
11
+ embeddings = OpenAIEmbeddings()
12
+
13
+ PINECONE_API_KEY = '6af52b8a-a3df-4189-899b-b21163027bb8'
14
+ PINECONE_API_ENV = 'asia-southeast1-gcp'
15
+
16
+ # initialize pinecone
17
+ pinecone.init(
18
+ api_key=PINECONE_API_KEY, # find at app.pinecone.io
19
+ environment=PINECONE_API_ENV # next to api key in console
20
+ )
21
+
22
+ index_name = "twimbit-answer"
23
+ vectorstore = Pinecone.from_existing_index(index_name=index_name, embedding=embeddings)
24
+
25
+ api_key = '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'
26
+
27
+
28
+ class ChatWrapper:
29
+
30
+ def __init__(self):
31
+ self.lock = Lock()
32
+
33
+ def __call__(
34
+ self, inp: str, history: Optional[Tuple[str, str]], chain
35
+ ):
36
+ """Execute the chat functionality."""
37
+ self.lock.acquire()
38
+ try:
39
+ history = history or []
40
+ # If chain is None, that is because no API key was provided.
41
+ # if chain is None:
42
+ # history.append((inp, "Please paste your OpenAI key to use"))
43
+ # return history, history
44
+ # Set OpenAI key
45
+
46
+ if api_key:
47
+ os.environ["OPENAI_API_KEY"] = api_key
48
+ chain = get_chain(vectorstore)
49
+ os.environ["OPENAI_API_KEY"] = ""
50
+
51
+ import openai
52
+ openai.api_key = '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'
53
+ # Run chain and append input.
54
+ output = chain({"question": inp, "chat_history": history})["answer"]
55
+ history.append((inp, output))
56
+ except Exception as e:
57
+ raise e
58
+ finally:
59
+ self.lock.release()
60
+ return history, history
61
+
62
+
63
+ chat = ChatWrapper()
64
+
65
+ block = gr.Blocks(css=".gradio-container {background-color: #111827};footer "
66
+ "{visibility: hidden};")
67
+
68
+ with block:
69
+ # with gr.Row():
70
+ # openai_api_key_textbox = gr.Textbox(
71
+ # placeholder="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",
72
+ # show_label=False,
73
+ # lines=1,
74
+ # type="password",
75
+ # value="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"
76
+ # )
77
+
78
+ chatbot = gr.Chatbot().style(height=500)
79
+
80
+ with gr.Row():
81
+ message = gr.Textbox(
82
+ label="What's your question?",
83
+ placeholder="Ask questions about reports",
84
+ lines=1,
85
+ )
86
+ submit = gr.Button(value="Send", variant="secondary").style(full_width=False)
87
+
88
+ # gr.Examples(
89
+ # examples=[
90
+ # "What did the president say about Kentaji Brown Jackson",
91
+ # "Did he mention Stephen Breyer?",
92
+ # "What was his stance on Ukraine",
93
+ # ],
94
+ # inputs=message,
95
+ # )
96
+
97
+ state = gr.State()
98
+ agent_state = gr.State()
99
+ submit.click(chat, inputs=[message, state, agent_state], outputs=[chatbot, state])
100
+ message.submit(chat, inputs=[message, state, agent_state], outputs=[chatbot, state])
101
+
102
+ # openai_api_key_textbox.change(
103
+ # set_openai_api_key,
104
+ # inputs=[openai_api_key_textbox],
105
+ # outputs=[agent_state],
106
+ # )
107
+
108
+ # block.launch(debug=True)
109
+ block.launch(debug=True, auth=('admin', 'password'), auth_message='enter username password to proceed further')
ingest_data.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
2
+ from langchain.document_loaders import UnstructuredFileLoader, CSVLoader
3
+ from langchain.vectorstores.faiss import FAISS
4
+ from langchain.embeddings import OpenAIEmbeddings
5
+ from langchain.vectorstores import Chroma, Pinecone
6
+ import pickle
7
+ import pinecone
8
+
9
+ # Load Data
10
+ # loader = UnstructuredFileLoader("output.md")
11
+ # raw_documents = loader.load()
12
+ loader = CSVLoader(file_path='./posts.csv', source_column="Post Title", encoding='utf-8')
13
+
14
+ raw_documents = loader.load()
15
+
16
+ # Split text
17
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=0)
18
+ documents = text_splitter.split_documents(raw_documents)
19
+
20
+ #
21
+ # # Load Data to vectorstore
22
+ embeddings = OpenAIEmbeddings()
23
+ # vectorstore = FAISS.from_documents(documents, embeddings)
24
+
25
+ # # Save vectorstore
26
+ # with open("posts.pkl", "wb") as f:
27
+ # pickle.dump(vectorstore, f)
28
+
29
+ PINECONE_API_KEY = '6af52b8a-a3df-4189-899b-b21163027bb8'
30
+ PINECONE_API_ENV = 'asia-southeast1-gcp'
31
+
32
+ # initialize pinecone
33
+ pinecone.init(
34
+ api_key=PINECONE_API_KEY, # find at app.pinecone.io
35
+ environment=PINECONE_API_ENV # next to api key in console
36
+ )
37
+
38
+ index_name = "twimbit-answer"
39
+
40
+ Pinecone.from_texts([t.page_content for t in documents], embeddings, index_name=index_name)
41
+
42
+ # query = "How many neo banks are in india ?"
43
+ #
44
+ # docsearch = Pinecone.from_existing_index(index_name=index_name, embedding=embeddings)
45
+ #
46
+ # docs = docsearch.similarity_search(query, include_metadata=True)
query_data.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.prompts.prompt import PromptTemplate
2
+ from langchain.llms import OpenAI, OpenAIChat
3
+ from langchain.chains import ChatVectorDBChain, ConversationalRetrievalChain
4
+ from langchain.chat_models import ChatOpenAI
5
+
6
+ _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a
7
+ standalone question.
8
+
9
+
10
+ Chat History:
11
+ {chat_history}
12
+ Follow Up Input: {question}
13
+ Standalone question:"""
14
+ CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
15
+
16
+ template = """You are a personal assistance for twimbit company for answering questions. You are given the following
17
+ extracted parts of a long document and a question. Provide a brief answer. If you don't know the answer, just say "
18
+ I'm not sure." Question: {question} ========= {context} ========= Answer in Markdown: """
19
+ QA_PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])
20
+
21
+
22
+ def get_chain(vectorstore):
23
+ llm = ChatOpenAI(temperature=0)
24
+ qa_chain = ConversationalRetrievalChain.from_llm(
25
+ llm,
26
+ vectorstore.as_retriever(search_kwargs={"k": 4})
27
+ # qa_prompt=QA_PROMPT,
28
+ # condense_question_prompt=CONDENSE_QUESTION_PROMPT,
29
+ )
30
+ return qa_chain
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ langchain~=0.0.123
2
+ openai
3
+ unstructured
4
+ faiss-cpu
5
+ gradio
6
+ pinecone-client