Spaces:
Sleeping
Sleeping
from transformers import pipeline | |
from huggingface_hub import InferenceClient | |
import os | |
system_messages = { "STRICT": """You are a chatbot evaluating github repositories, their python codes and corresponding readme files. | |
Keep in mind, that the code you are provided is only one of many files in the repository. | |
Strictly answer the questions with "Yes" or "No". | |
Don't use any punctuation either.""", | |
"HELP": """You are a chatbot evaluating github repositories, their python codes and corresponding readme files. | |
Please help me answer the following question. | |
Keep your answers short, and informative. | |
Your answer should be a single paragraph.""", | |
"PITFALL": """You are a chatbot evaluating github repositories, their python codes and corresponding readme files. | |
You are looking for common pitfalls in the code. | |
Keep in mind, that the code you are provided is only one of many files in the repository. | |
Keep your answer short and informative. | |
Only report serious flaws. If you don't find any, don't mention it. | |
Answer using only a single, short paragraph. | |
Only point out pitfalls if you are certain about them! | |
Pitfall #1 Design-flaws with regards to the data collection in the code.")) | |
Pitfall #2 Dataset shift (e.g. sampling bias, imbalanced populations, imbalanced labels, non-stationary environments).")) | |
Pitfall #3 Confounders.")) | |
Pitfall #4 Measurement errors (labelling mistakes, noisy measurements, inappropriate proxies)")) | |
Pitfall #5 Historical biases in the data used.")) | |
Pitfall #6 Information leaking between the training and testing data.")) | |
Pitfall #7 Model-problem mismatch (e.g. over-complicated/simplistic model, computational challenges)")) | |
Pitfall #8 Overfitting in the code (e.g. high variance, high complexity, low bias).")) | |
Pitfall #9 Misused metrics in the code (e.g. poor metric selection, poor implementations)")) | |
Pitfall #10 Black box models in the code (e.g. lack of interpretability, lack of transparency)")) | |
Pitfall #11 Baseline comparison issues (e.g. if the testing data does not fit the training data)")) | |
Pitfall #12 Insufficient reporting in the code (e.g. missing hyperparameters, missing evaluation metrics)")) | |
Pitfall #13 Faulty interpretations of the reported results.""" } | |
class LocalLLM(): | |
def __init__(self, model_name): | |
self.pipe = pipeline("text-generation", model=model_name, max_new_tokens=1000, device=0, pad_token_id=128001) | |
def predict(self, response_type, prompt): | |
messages = [ | |
{"role": "system", "content": system_messages[response_type]}, | |
{"role": "user", "content": prompt}, | |
] | |
res = self.pipe(messages) | |
res = res[0]["generated_text"] | |
res = [response for response in res if response["role"] == "assistant"][0]["content"] | |
res = res.strip() | |
return res | |
class RemoteLLM(): | |
def __init__(self, model_name): | |
token = os.getenv("hfToken") | |
self.model_name = model_name | |
self.client = InferenceClient(api_key=token) | |
def predict(self, response_type, prompt): | |
message = self.client.chat_completion( | |
model=self.model_name, max_tokens=500, stream=False, | |
messages=[{"role": "system", "content": system_messages[response_type]}, | |
{"role": "user", "content": prompt}]) | |
return message['choices'][0]['message']['content'] |