merterbak commited on
Commit
ae0ab06
·
verified ·
1 Parent(s): 0c86c79

Harmony attempt #1 blended with simple formatting

Browse files
Files changed (1) hide show
  1. app.py +51 -7
app.py CHANGED
@@ -4,6 +4,13 @@ from threading import Thread
4
  import gradio as gr
5
  import spaces
6
  import re
 
 
 
 
 
 
 
7
 
8
  model_id = "openai/gpt-oss-20b"
9
 
@@ -12,7 +19,9 @@ pipe = pipeline(
12
  model=model_id,
13
  torch_dtype="auto",
14
  device_map="auto",
 
15
  )
 
16
  def format_conversation_history(chat_history):
17
  messages = []
18
  for item in chat_history:
@@ -22,6 +31,34 @@ def format_conversation_history(chat_history):
22
  content = content[0]["text"] if content and "text" in content[0] else str(content)
23
  messages.append({"role": role, "content": content})
24
  return messages
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  @spaces.GPU()
27
  def generate_response(input_data, chat_history, max_new_tokens, system_prompt, temperature, top_p, top_k, repetition_penalty):
@@ -29,7 +66,12 @@ def generate_response(input_data, chat_history, max_new_tokens, system_prompt, t
29
  system_message = [{"role": "system", "content": system_prompt}] if system_prompt else []
30
  processed_history = format_conversation_history(chat_history)
31
  messages = system_message + processed_history + [new_message]
 
 
 
 
32
  streamer = TextIteratorStreamer(pipe.tokenizer, skip_prompt=True, skip_special_tokens=True)
 
33
  generation_kwargs = {
34
  "max_new_tokens": max_new_tokens,
35
  "do_sample": True,
@@ -37,18 +79,19 @@ def generate_response(input_data, chat_history, max_new_tokens, system_prompt, t
37
  "top_p": top_p,
38
  "top_k": top_k,
39
  "repetition_penalty": repetition_penalty,
40
- "streamer": streamer
 
41
  }
42
- thread = Thread(target=pipe, args=(messages,), kwargs=generation_kwargs)
43
  thread.start()
44
- # simple formatting without harmony because of no tool usage etc. and experienced hf space problems with harmony
45
  thinking = ""
46
  final = ""
47
  started_final = False
48
  for chunk in streamer:
49
  if not started_final:
50
  if "assistantfinal" in chunk.lower():
51
- split_parts = re.split(r'assistantfinal', chunk, maxsplit=1)
52
  thinking += split_parts[0]
53
  final += split_parts[1]
54
  started_final = True
@@ -56,7 +99,7 @@ def generate_response(input_data, chat_history, max_new_tokens, system_prompt, t
56
  thinking += chunk
57
  else:
58
  final += chunk
59
- clean_thinking = re.sub(r'^analysis\s*', '', thinking).strip()
60
  clean_final = final.strip()
61
  formatted = f"<details open><summary>Click to view Thinking Process</summary>\n\n{clean_thinking}\n\n</details>\n\n{clean_final}"
62
  yield formatted
@@ -78,8 +121,9 @@ demo = gr.ChatInterface(
78
  ],
79
  examples=[
80
  [{"text": "Explain Newton laws clearly and concisely"}],
81
- [{"text": "Write a Python function to calculate the Fibonacci sequence"}],
82
  [{"text": "What are the benefits of open weight AI models"}],
 
 
83
  ],
84
  cache_examples=False,
85
  type="messages",
@@ -96,4 +140,4 @@ Give it a couple of seconds to start. You can adjust reasoning level in the syst
96
  )
97
 
98
  if __name__ == "__main__":
99
- demo.launch(share=True)
 
4
  import gradio as gr
5
  import spaces
6
  import re
7
+ from openai_harmony import (
8
+ load_harmony_encoding,
9
+ HarmonyEncodingName,
10
+ Role,
11
+ Message,
12
+ Conversation,
13
+ )
14
 
15
  model_id = "openai/gpt-oss-20b"
16
 
 
19
  model=model_id,
20
  torch_dtype="auto",
21
  device_map="auto",
22
+ trust_remote_code=True,
23
  )
24
+ enc = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
25
  def format_conversation_history(chat_history):
26
  messages = []
27
  for item in chat_history:
 
31
  content = content[0]["text"] if content and "text" in content[0] else str(content)
32
  messages.append({"role": role, "content": content})
33
  return messages
34
+ #OpenAI's harmony format
35
+ def build_harmony_conversation_from_messages(messages):
36
+ harmony_messages = []
37
+ for m in messages:
38
+ role = m["role"].lower()
39
+ content = m["content"]
40
+ if role == "system":
41
+ harmony_messages.append(
42
+ Message.from_role_and_content(
43
+ Role.SYSTEM,
44
+ content,
45
+ )
46
+ )
47
+ elif role == "user":
48
+ harmony_messages.append(
49
+ Message.from_role_and_content(
50
+ Role.USER,
51
+ content,
52
+ )
53
+ )
54
+ elif role == "assistant":
55
+ harmony_messages.append(
56
+ Message.from_role_and_content(
57
+ Role.ASSISTANT,
58
+ content,
59
+ )
60
+ )
61
+ return Conversation.from_messages(harmony_messages)
62
 
63
  @spaces.GPU()
64
  def generate_response(input_data, chat_history, max_new_tokens, system_prompt, temperature, top_p, top_k, repetition_penalty):
 
66
  system_message = [{"role": "system", "content": system_prompt}] if system_prompt else []
67
  processed_history = format_conversation_history(chat_history)
68
  messages = system_message + processed_history + [new_message]
69
+ conversation = build_harmony_conversation_from_messages(messages)
70
+ prompt_tokens = enc.render_conversation_for_completion(conversation, Role.ASSISTANT)
71
+ prompt_text = pipe.tokenizer.decode(prompt_tokens, skip_special_tokens=False)
72
+
73
  streamer = TextIteratorStreamer(pipe.tokenizer, skip_prompt=True, skip_special_tokens=True)
74
+
75
  generation_kwargs = {
76
  "max_new_tokens": max_new_tokens,
77
  "do_sample": True,
 
79
  "top_p": top_p,
80
  "top_k": top_k,
81
  "repetition_penalty": repetition_penalty,
82
+ "streamer": streamer,
83
+ "return_full_text": False,
84
  }
85
+ thread = Thread(target=pipe, args=(prompt_text,), kwargs=generation_kwargs)
86
  thread.start()
87
+
88
  thinking = ""
89
  final = ""
90
  started_final = False
91
  for chunk in streamer:
92
  if not started_final:
93
  if "assistantfinal" in chunk.lower():
94
+ split_parts = re.split(r'(?i)assistantfinal', chunk, maxsplit=1)
95
  thinking += split_parts[0]
96
  final += split_parts[1]
97
  started_final = True
 
99
  thinking += chunk
100
  else:
101
  final += chunk
102
+ clean_thinking = re.sub(r'^analysis\s*', '', thinking, flags=re.I).strip()
103
  clean_final = final.strip()
104
  formatted = f"<details open><summary>Click to view Thinking Process</summary>\n\n{clean_thinking}\n\n</details>\n\n{clean_final}"
105
  yield formatted
 
121
  ],
122
  examples=[
123
  [{"text": "Explain Newton laws clearly and concisely"}],
 
124
  [{"text": "What are the benefits of open weight AI models"}],
125
+ [{"text": "Write a Python function to calculate the Fibonacci sequence"}],
126
+
127
  ],
128
  cache_examples=False,
129
  type="messages",
 
140
  )
141
 
142
  if __name__ == "__main__":
143
+ demo.launch()