venkatviswa commited on
Commit
266fbb3
·
verified ·
1 Parent(s): a75625c

updates to LLM

Browse files

Phi -4-mini
added updated prompt

Files changed (1) hide show
  1. summarizer_module/__init__.py +28 -16
summarizer_module/__init__.py CHANGED
@@ -6,33 +6,45 @@ import torch
6
 
7
  device = get_device()
8
 
9
- # Use a small local model (e.g., Phi-2)
10
- MODEL_ID = "microsoft/phi-2" # Ensure it's downloaded and cached locally
11
 
12
  # Load model and tokenizer
13
- model = AutoModelForCausalLM.from_pretrained(MODEL_ID).to(device)
 
 
 
 
 
14
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
15
  summarizer = pipeline("text-generation", model=model, tokenizer=tokenizer)
16
 
17
  def summarize_flowchart(flowchart_json):
18
  """
19
- Given a flowchart JSON with 'start' and 'steps', returns a plain English explanation
20
- formatted as bullets and sub-bullets.
21
 
22
  Args:
23
- flowchart_json (dict): Structured representation of flowchart
24
 
25
  Returns:
26
- str: Bullet-style natural language summary of the logic
27
  """
 
28
  prompt = (
29
- "Turn the following flowchart into a bullet-point explanation in plain English.\n"
30
- "Use bullets for steps and sub-bullets for branches.\n"
31
- "\n"
32
- f"Flowchart JSON:\n{flowchart_json}\n"
33
- "\nExplanation:"
34
- )
35
-
36
- result = summarizer(prompt, max_new_tokens=300, do_sample=False)[0]["generated_text"]
37
- explanation = result.split("Explanation:")[-1].strip()
 
 
 
 
 
 
 
38
  return explanation
 
6
 
7
  device = get_device()
8
 
9
+ # Model config: Use phi-2-mini (replace with phi-4-mini when available)
10
+ MODEL_ID = "microsoft/Phi-4-mini-instruct" # Ensure it's downloaded and cached locally
11
 
12
  # Load model and tokenizer
13
+ model = AutoModelForCausalLM.from_pretrained(
14
+ MODEL_ID,
15
+ device_map="auto",
16
+ # load_in_8bit=True,
17
+ torch_dtype=torch.float16
18
+ ).to(device)
19
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
20
  summarizer = pipeline("text-generation", model=model, tokenizer=tokenizer)
21
 
22
  def summarize_flowchart(flowchart_json):
23
  """
24
+ Generates a human-friendly explanation from flowchart JSON.
 
25
 
26
  Args:
27
+ flowchart_json (dict): Contains "start" node and a list of "steps".
28
 
29
  Returns:
30
+ str: Bullet-style explanation with proper nesting and flow.
31
  """
32
+ # 📄 Prompt optimized for flow comprehension
33
  prompt = (
34
+ "You are an expert in visual reasoning and instruction generation.\n"
35
+ "Convert the following flowchart JSON into a clear, step-by-step summary using bullets.\n"
36
+ "- Each bullet represents a process step.\n"
37
+ "- Use indented sub-bullets to explain decision branches (Yes/No).\n"
38
+ "- Maintain order based on dependencies and parent-child links.\n"
39
+ "- Avoid repeating the same step more than once.\n"
40
+ "- Do not include JSON in the output, only human-readable text.\n"
41
+ "\nFlowchart:\n{flowchart}\n\nBullet Explanation:"
42
+ ).format(flowchart=json.dumps(flowchart_json, indent=2))
43
+
44
+ result = summarizer(prompt, max_new_tokens=400, do_sample=False)[0]["generated_text"]
45
+ # Extract the portion after the final prompt marker
46
+ if "Bullet Explanation:" in result:
47
+ explanation = result.split("Bullet Explanation:")[-1].strip()
48
+ else:
49
+ explanation = result.strip()
50
  return explanation