SysModeler commited on
Commit
f979d1d
·
verified ·
1 Parent(s): e038228

Upload 6 files

Browse files
.gitattributes CHANGED
@@ -34,3 +34,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  faiss_index_sysml/index.faiss filter=lfs diff=lfs merge=lfs -text
 
 
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  faiss_index_sysml/index.faiss filter=lfs diff=lfs merge=lfs -text
37
+ Dataset/Lenny[[:space:]]Delligatti[[:space:]]-[[:space:]]SysML[[:space:]]Distilled[[:space:]][[:space:]]A[[:space:]]Brief[[:space:]]Guide[[:space:]]to[[:space:]]the[[:space:]]Systems[[:space:]]Modeling[[:space:]]Language-Addison-Wesley[[:space:]]Professional[[:space:]](2013).pdf filter=lfs diff=lfs merge=lfs -text
38
+ Dataset/OMG[[:space:]]Systems[[:space:]]Modeling[[:space:]]Language[[:space:]](OMG[[:space:]]SysML).pdf filter=lfs diff=lfs merge=lfs -text
39
+ Dataset/The_SysML_Modelling_Language.pdf filter=lfs diff=lfs merge=lfs -text
Dataset/Lenny Delligatti - SysML Distilled A Brief Guide to the Systems Modeling Language-Addison-Wesley Professional (2013).pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adad4affd9427f87a0ec9217abf30bab0cef7c2cc438023665e66c90d5ed6f9d
3
+ size 6350490
Dataset/OMG Systems Modeling Language (OMG SysML).pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:633007f2b5e8257f40b0315732d5c0720c1e025cd6bf78dfd9c0aa43eaff23fe
3
+ size 3613524
Dataset/The_SysML_Modelling_Language.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:406089eb1d4f67ca35cfa42cce377ad7435b30e5baa435cdd00847afdec635dd
3
+ size 233280
Dataset/sysmodeler_user_manual.pdf ADDED
Binary file (36.4 kB). View file
 
vdb_script/faiss_vdb_script.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain.document_loaders import PyPDFLoader, DirectoryLoader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.embeddings.openai import OpenAIEmbeddings
5
+ from langchain_community.vectorstores import FAISS
6
+ from langchain.chains import RetrievalQA
7
+ from langchain.llms import OpenAI
8
+ from dotenv import load_dotenv
9
+
10
+ # Load environment variable for OpenAI key
11
+ load_dotenv()
12
+
13
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
14
+ if not OPENAI_API_KEY:
15
+ raise ValueError("Missing OPENAI_API_KEY in environment variables.")
16
+
17
+ # Extract Data from the PDFs
18
+ def load_pdf_file(data_path):
19
+ loader = DirectoryLoader(data_path, glob="*.pdf", loader_cls=PyPDFLoader)
20
+ documents = loader.load()
21
+ return documents
22
+
23
+ # Split the data into chunks
24
+ def text_split(docs):
25
+ splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
26
+ return splitter.split_documents(docs)
27
+
28
+ # Set up LLM and Embedding
29
+ llm = OpenAI(model_name="gpt-4o-mini", temperature=0.5, openai_api_key=OPENAI_API_KEY)
30
+ embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
31
+
32
+ # Load PDF, chunk it, embed it, and store in FAISS
33
+ pdf_docs = load_pdf_file("/kaggle/input/rag-test") # Update this to your PDF folder
34
+ chunks = text_split(pdf_docs)
35
+
36
+ vectorstore = FAISS.from_documents(chunks, embeddings)
37
+ vectorstore.save_local("faiss_index_sysml")
38
+
39
+ # Load FAISS and create retriever QA chain
40
+ # new_vectorstore = FAISS.load_local("faiss_index_sysml", embeddings, allow_dangerous_deserialization=True)
41
+ # qa = RetrievalQA.from_chain_type(
42
+ # llm=llm,
43
+ # chain_type="stuff",
44
+ # retriever=new_vectorstore.as_retriever()
45
+ # )
46
+
47
+ # # Run a sample query
48
+ # query = "What is SysML used for?"
49
+ # print("User:", query)
50
+ # print("Bot:", qa.run(query))
vdb_script/requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain==0.3.25
2
+ langchain-community==0.3.24
3
+ langchain-core==0.3.60
4
+ langchain-openai==0.3.17
5
+ openai==1.79.0
6
+ faiss-cpu==1.11.0
7
+ python-dotenv==1.1.0
8
+ gradio==4.15.0
9
+ gradio_client==0.8.1
10
+ huggingface_hub >= 0.19.3