Spaces:

Coffee-Gym
/

COFFEE-DEMO

Sleeping

COFFEE-DEMO / app.py

Anonymous-COFFEE

Create app.py

26d32ae verified about 1 year ago

6.14 kB

	import streamlit as st
	from langchain_community.llms import OpenAI
	import argparse
	from datasets import load_dataset
	import yaml
	from tqdm import tqdm
	import re



	def load_data(split="test"):
	data = load_dataset("bigcode/humanevalpack")
	print("=========== dataset statistics ===========")
	print(len(data[split]))
	print("==========================================")
	return data[split]



	def split_function_header_and_docstring(s):
	# pattern = re.compile(r'\"\"\"(.*?)\"\"\"', re.DOTALL)
	pattern = re.compile(r"(\"\"\"(.?)\"\"\"\|\'\'\'(.?)\'\'\')", re.DOTALL)
	match = pattern.findall(s)
	if match:
	# docstring = match.group(-1)
	docstring = match[-1][0]
	code_without_docstring = s.replace(docstring, "").replace('"' * 6, "").strip()
	docstring = docstring.replace('"', "")
	else:
	raise ValueError
	return code_without_docstring, docstring


	def prepare_model_input(code_data):
	prompt = """Provide feedback on the errors in the given code and suggest the
	correct code to address the described problem.

	Problem Description:
	{description}

	Incorrect Code:
	{wrong_code}"""

	description = code_data["prompt"]
	function_header, docstring = split_function_header_and_docstring(description)
	problem = docstring.split(">>>")[0]

	wrong_code = function_header + code_data["buggy_solution"]
	template_dict = {"function_header": function_header, "description": problem, "wrong_code": wrong_code}
	model_input = prompt.format(**template_dict)
	return model_input, problem, function_header


	def load_and_prepare_data():
	dataset = load_data()
	all_model_inputs = {}
	print("### load and prepare data")
	for data in tqdm(dataset):
	problem_id = data['task_id']
	buggy_solution = data['buggy_solution']
	model_input, problem, function_header = prepare_model_input(data)
	new_model_input =f"Provide feedback on the errors in the given code and suggest the correct code to address the described problem.\nProblem Description:{problem}\nIncorrect Code:\n{buggy_solution}\nFeedback:"
	# data["header"] = function_header
	all_model_inputs[problem_id] = {
	"model_input": new_model_input,
	"header": function_header,
	"problem_description": problem,
	"data": data
	}
	return all_model_inputs




	dataset = load_dataset("bigcode/humanevalpack", split='test', trust_remote_code=True) # Ensuring consistent split usage

	problem_ids = [problem['task_id'] for problem in dataset]
	all_model_inputs = load_and_prepare_data()


	# Initialize with dummy ports for demonstration purposes here
	parser = argparse.ArgumentParser()
	parser.add_argument("--editor_port", type=str, default="6000")
	parser.add_argument("--critic_port", type=str, default="6001")

	# Assuming args are passed via command line interface
	args = parser.parse_args()

	# Initialize Langchain LLMs for our models (please replace 'your_api_key' with actual API keys)
	editor_model = OpenAI(model="Anonymous-COFFEE/COFFEEPOTS-editor", api_key="EMPTY", openai_api_base=f"https://editor.jp.ngrok.io/v1")
	# critic_model = OpenAI(model="Anonymous-COFFEE/COFFEEPOTS-critic", api_key="EMPTY", openai_api_base=f"http://localhost:{args.critic_port}/v1")

	critic_model = OpenAI(model="Anonymous-COFFEE/COFFEEPOTS-critic", api_key="EMPTY", openai_api_base=f"https://critic.jp.ngrok.io/v1")

	st.title("Demo for COFFEEPOTS")

	selected_task_id = st.selectbox("Select a problem ID:", problem_ids)

	# Retrieve selected problem details
	problem_details = dataset[problem_ids.index(selected_task_id)]

	st.write(f"Selected Problem ID: {problem_details['task_id']}")
	st.write(f"Problem Description:\n{all_model_inputs[selected_task_id]['problem_description']}")
	# Display buggy code with syntax highlighting
	st.code(problem_details['buggy_solution'], language='python')

	status_text = st.empty()
	code_output = st.code("", language="python")

	def generate_feedback():

	return critic_model.stream(input=f"{all_model_inputs[selected_task_id]['model_input']}", logit_bias=None)
	# feedback = output.generations[0][0].text
	# return feedback
	# def generate_corrected_code():
	# return "```python"+editor_model.stream(input=f"Buggy Code:\n{problem_details['buggy_solution']}\nFeedback: {feedback}", logit_bias=None)
	def generate_corrected_code():
	# Stream output from the editor model
	yield "```python"
	for text_chunk in editor_model.stream(input=f"[INST]Buggy Code:\n{problem_details['buggy_solution']}\nFeedback: {feedback}[/INST]", logit_bias=None):
	yield text_chunk # Assuming each chunk is part of the final code

	yield "```"
	# time.sleep(0.02) # Simulate processing delay; Adjust timing as necessary


	if st.button("Generate Feedback and Corrected Code"):
	# Example of generating feedback and corrected code (replace these with actual model calls)
	with st.spinner("Generating feedback..."):
	# Simulate API call to critic_model
	print(f"model input for critic:")
	print(all_model_inputs[selected_task_id]['model_input'])
	# output = critic_model.generate(prompts=[f"{all_model_inputs[selected_task_id]['model_input']}"], logit_bias=None)
	# feedback = output.generations[0][0].text
	# print(feedback)
	# feedback = "dummy feedback"

	# status_text.markdown(f"{feedback}")
	feedback = status_text.write_stream(generate_feedback())
	# status_text.code(f"{feedback}", language='python')

	with st.spinner("Generating corrected code..."):
	# Simulate API call to editor_model
	# output = editor_model.generate(prompts=[f"Buggy Code:\n{problem_details['buggy_solution']}\nFeedback: {feedback}"], logit_bias=None)
	# corrected_code = output.generations[0][0].text
	# print(corrected_code)
	# corrected_code = "dummy code"
	# st.write("Corrected Code:")
	corrected_code = code_output.write_stream(generate_corrected_code())
	# code_output.code(corrected_code, language='python')