Spaces:
Sleeping
Sleeping
Upload 6 files
Browse files- .gitattributes +3 -0
- Dataset/Lenny Delligatti - SysML Distilled A Brief Guide to the Systems Modeling Language-Addison-Wesley Professional (2013).pdf +3 -0
- Dataset/OMG Systems Modeling Language (OMG SysML).pdf +3 -0
- Dataset/The_SysML_Modelling_Language.pdf +3 -0
- Dataset/sysmodeler_user_manual.pdf +0 -0
- vdb_script/faiss_vdb_script.py +50 -0
- vdb_script/requirements.txt +10 -0
.gitattributes
CHANGED
@@ -34,3 +34,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
faiss_index_sysml/index.faiss filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
faiss_index_sysml/index.faiss filter=lfs diff=lfs merge=lfs -text
|
37 |
+
Dataset/Lenny[[:space:]]Delligatti[[:space:]]-[[:space:]]SysML[[:space:]]Distilled[[:space:]][[:space:]]A[[:space:]]Brief[[:space:]]Guide[[:space:]]to[[:space:]]the[[:space:]]Systems[[:space:]]Modeling[[:space:]]Language-Addison-Wesley[[:space:]]Professional[[:space:]](2013).pdf filter=lfs diff=lfs merge=lfs -text
|
38 |
+
Dataset/OMG[[:space:]]Systems[[:space:]]Modeling[[:space:]]Language[[:space:]](OMG[[:space:]]SysML).pdf filter=lfs diff=lfs merge=lfs -text
|
39 |
+
Dataset/The_SysML_Modelling_Language.pdf filter=lfs diff=lfs merge=lfs -text
|
Dataset/Lenny Delligatti - SysML Distilled A Brief Guide to the Systems Modeling Language-Addison-Wesley Professional (2013).pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:adad4affd9427f87a0ec9217abf30bab0cef7c2cc438023665e66c90d5ed6f9d
|
3 |
+
size 6350490
|
Dataset/OMG Systems Modeling Language (OMG SysML).pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:633007f2b5e8257f40b0315732d5c0720c1e025cd6bf78dfd9c0aa43eaff23fe
|
3 |
+
size 3613524
|
Dataset/The_SysML_Modelling_Language.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:406089eb1d4f67ca35cfa42cce377ad7435b30e5baa435cdd00847afdec635dd
|
3 |
+
size 233280
|
Dataset/sysmodeler_user_manual.pdf
ADDED
Binary file (36.4 kB). View file
|
|
vdb_script/faiss_vdb_script.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
|
3 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
4 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
5 |
+
from langchain_community.vectorstores import FAISS
|
6 |
+
from langchain.chains import RetrievalQA
|
7 |
+
from langchain.llms import OpenAI
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
|
10 |
+
# Load environment variable for OpenAI key
|
11 |
+
load_dotenv()
|
12 |
+
|
13 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
14 |
+
if not OPENAI_API_KEY:
|
15 |
+
raise ValueError("Missing OPENAI_API_KEY in environment variables.")
|
16 |
+
|
17 |
+
# Extract Data from the PDFs
|
18 |
+
def load_pdf_file(data_path):
|
19 |
+
loader = DirectoryLoader(data_path, glob="*.pdf", loader_cls=PyPDFLoader)
|
20 |
+
documents = loader.load()
|
21 |
+
return documents
|
22 |
+
|
23 |
+
# Split the data into chunks
|
24 |
+
def text_split(docs):
|
25 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
|
26 |
+
return splitter.split_documents(docs)
|
27 |
+
|
28 |
+
# Set up LLM and Embedding
|
29 |
+
llm = OpenAI(model_name="gpt-4o-mini", temperature=0.5, openai_api_key=OPENAI_API_KEY)
|
30 |
+
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
|
31 |
+
|
32 |
+
# Load PDF, chunk it, embed it, and store in FAISS
|
33 |
+
pdf_docs = load_pdf_file("/kaggle/input/rag-test") # Update this to your PDF folder
|
34 |
+
chunks = text_split(pdf_docs)
|
35 |
+
|
36 |
+
vectorstore = FAISS.from_documents(chunks, embeddings)
|
37 |
+
vectorstore.save_local("faiss_index_sysml")
|
38 |
+
|
39 |
+
# Load FAISS and create retriever QA chain
|
40 |
+
# new_vectorstore = FAISS.load_local("faiss_index_sysml", embeddings, allow_dangerous_deserialization=True)
|
41 |
+
# qa = RetrievalQA.from_chain_type(
|
42 |
+
# llm=llm,
|
43 |
+
# chain_type="stuff",
|
44 |
+
# retriever=new_vectorstore.as_retriever()
|
45 |
+
# )
|
46 |
+
|
47 |
+
# # Run a sample query
|
48 |
+
# query = "What is SysML used for?"
|
49 |
+
# print("User:", query)
|
50 |
+
# print("Bot:", qa.run(query))
|
vdb_script/requirements.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain==0.3.25
|
2 |
+
langchain-community==0.3.24
|
3 |
+
langchain-core==0.3.60
|
4 |
+
langchain-openai==0.3.17
|
5 |
+
openai==1.79.0
|
6 |
+
faiss-cpu==1.11.0
|
7 |
+
python-dotenv==1.1.0
|
8 |
+
gradio==4.15.0
|
9 |
+
gradio_client==0.8.1
|
10 |
+
huggingface_hub >= 0.19.3
|