File size: 6,797 Bytes
c02b4bf
 
 
 
6113980
b26b0a3
1c581ef
 
c02b4bf
6113980
 
07026cb
f06025d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138851d
07026cb
 
 
 
138851d
a880370
07026cb
 
 
 
 
 
 
 
2742bc2
02412d9
6113980
 
 
1c581ef
6113980
1c581ef
6113980
 
d3ae86a
1c581ef
 
c02b4bf
d3ae86a
c02b4bf
 
 
 
1c581ef
c02b4bf
 
1c581ef
c02b4bf
1c581ef
c02b4bf
1c581ef
c02b4bf
1c581ef
 
b518cbb
7502cc2
b518cbb
 
 
 
 
 
 
 
 
 
c02b4bf
1c581ef
 
 
c02b4bf
 
 
 
 
 
 
 
 
 
 
1c581ef
 
 
 
c02b4bf
1c581ef
 
 
c02b4bf
1c581ef
 
 
 
 
c02b4bf
 
 
 
4dfdae4
1c581ef
c02b4bf
 
1c581ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c02b4bf
5efc135
 
 
1c581ef
 
c02b4bf
1c581ef
5efc135
1c581ef
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import os
import gradio as gr
import json
from huggingface_hub import InferenceClient
import gspread
from google.oauth2 import service_account
from datetime import datetime
import chromadb

# Google Sheets setup
scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
key1 = os.getenv("key1")
key2 = os.getenv("key2")
key3 = os.getenv("key3")
key4 = os.getenv("key4")
key5 = os.getenv("key5")
key6 = os.getenv("key6")
key7 = os.getenv("key7")
key8 = os.getenv("key8")
key9 = os.getenv("key9")
key10 = os.getenv("key10")
key11 = os.getenv("key11")
key12 = os.getenv("key12")
key13 = os.getenv("key13")
key14 = os.getenv("key14")
key15 = os.getenv("key15")
key16 = os.getenv("key16")
key17 = os.getenv("key17")
key18 = os.getenv("key18")
key19 = os.getenv("key19")
key20 = os.getenv("key20")
key21 = os.getenv("key21")
key22 = os.getenv("key22")
key23 = os.getenv("key23")
key24 = os.getenv("key24")
key25 = os.getenv("key25")
key26 = os.getenv("key26")
key27 = os.getenv("key27")
key28 = os.getenv("key28")
pkey="-----BEGIN PRIVATE KEY-----\n"+key2+"\n"+key3+"\n"+ key4+"\n"+key5+"\n"+ key6+"\n"+key7+"\n"+key8+"\n"+key9+"\n"+key10+"\n"+key11+"\n"+key12+"\n"+key13+"\n"+key14+"\n"+key15+"\n"+key16+"\n"+key17+"\n"+key18+"\n"+key19+"\n"+key20+"\n"+key21+"\n"+key22+"\n"+key24+"\n"+key25+"\n"+key26+"\n"+key27+"\n"+key28+"\n-----END PRIVATE KEY-----\n"
json_data={
  "type": "service_account",
  "project_id": "nestolechatbot",
  "private_key_id": key1,
  "private_key": pkey,
  "client_email": "nestoleservice@nestolechatbot.iam.gserviceaccount.com",
  "client_email": "nestoleservice@nestolechatbot.iam.gserviceaccount.com",
  "client_id": "107457262210035412036",
  "auth_uri": "https://accounts.google.com/o/oauth2/auth",
  "token_uri": "https://oauth2.googleapis.com/token",
  "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
  "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/nestoleservice%40nestolechatbot.iam.gserviceaccount.com",
  "universe_domain": "googleapis.com"
}
creds = service_account.Credentials.from_service_account_info(json_data, scopes=scope)

client = gspread.authorize(creds)
sheet = client.open("nestolechatbot").sheet1  # Open the sheet

def save_to_sheet(date, name, message):
    # Write user input to the Google Sheet
    sheet.append_row([date, name, message])
    return f"Thanks {name}, your message has been saved!"

path='/Users/thiloid/Desktop/LSKI/ole_nest/Chatbot/LLM/chromaTS'
if not os.path.exists(path):
    path = "/home/user/app/chromaTS"

print(path)
client = chromadb.PersistentClient(path=path)
print(client.heartbeat()) 
print(client.get_version())  
print(client.list_collections()) 

from chromadb.utils import embedding_functions
default_ef = embedding_functions.DefaultEmbeddingFunction()
sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="T-Systems-onsite/cross-en-de-roberta-sentence-transformer")

collection = client.get_collection(name="chromaTS", embedding_function=sentence_transformer_ef)

inference_client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")

# Global variable to store the URL
global_url = ""

def format_prompt(message, history):
    print("HISTORY")
    print(history)
    prompt = ""
    if history:
        user_prompt, bot_response = history[-1]
        prompt += f"[INST] {user_prompt} [/INST] {bot_response}</s> "
    prompt += f"[INST] {message} [/INST]"
    print("Final P")
    print(prompt)
    return prompt

def response(prompt, history, temperature=0.9, max_new_tokens=500, top_p=0.95, repetition_penalty=1.0):
    global global_url
    print(f"Working with URL: {global_url}")  # You can use the URL here
    temperature = float(temperature)
    if temperature < 1e-2: temperature = 1e-2
    top_p = float(top_p)
    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=42,
    )
    search_prompt = format_prompt(prompt, history)
    results = collection.query(
        query_texts=[search_prompt],
        n_results=60,
    )
    dists = ["<br><small>(relevance: " + str(round((1-d)*100)/100) + ";" for d in results['distances'][0]]
    results = results['documents'][0]
    combination = zip(results, dists)
    combination = [' '.join(triplets) for triplets in combination]
    if len(results) > 1:
        addon = "Bitte berücksichtige bei deiner Antwort ausschießlich folgende Auszüge aus unserer Datenbank, sofern sie für die Antwort erforderlich sind. Beantworte die Frage knapp und präzise. Ignoriere unpassende Datenbank-Auszüge OHNE sie zu kommentieren, zu erwähnen oder aufzulisten:\n" + "\n".join(results)
    system = "Du bist ein deutschsprachiges KI-basiertes Studienberater Assistenzsystem, das zu jedem Anliegen möglichst geeignete Studieninformationen empfiehlt." + addon + "\n\nUser-Anliegen:"   
    formatted_prompt = format_prompt(system + "\n" + prompt, history)
    stream = inference_client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""
    for response in stream:
        output += response.token.text
        yield output
    now = str(datetime.now())
    save_to_sheet(now, prompt, output)
    yield output

def js_code():
    return """
    <script>
        function getUrl() {
            const url = window.location.href;
            const xhr = new XMLHttpRequest();
            xhr.open("POST", "/submit_url", true);
            xhr.setRequestHeader("Content-Type", "application/json");
            xhr.onreadystatechange = function() {
                if (xhr.readyState === 4 && xhr.status === 200) {
                    console.log("URL submitted successfully");
                }
            };
            xhr.send(JSON.stringify({ url: url }));
        }
        window.onload = getUrl;
    </script>
    """

def submit_url(url: str):
    global global_url
    global_url = url  # Save the URL in the global variable
    print(f"Received URL: {url}")
    return url

chatbot = gr.ChatInterface(
    response,
    chatbot=gr.Chatbot(value=[[None, "Herzlich willkommen! Ich bin Chätti ein KI-basiertes Studienassistenzsystem, das für jede Anfrage die am besten Studieninformationen empfiehlt.<br>Erzähle mir, was du gerne tust!"]], render_markdown=True),
    title="German Studyhelper Chätti"
)

# Add a route to handle the URL submission
chatbot.launch(share=True, js=js_code())

@gr.routes.post("/submit_url")
async def process_url(request):
    data = await request.json()
    url = data.get("url", "")
    submit_url(url)
    return {"status": "success"}

print("Interface up and running!")