Girinath11 commited on
Commit
87ce049
Β·
verified Β·
1 Parent(s): f1bdb4d

Upload 7 files

Browse files
Files changed (7) hide show
  1. app.py +25 -0
  2. config.yml +7 -0
  3. executor.py +67 -0
  4. mainfest.yml +8 -0
  5. model_wrapper.py +38 -0
  6. readme.md +20 -0
  7. requirements.txt +14 -0
app.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from model_wrapper import CodeDebuggerWrapper
3
+
4
+ # instantiate once (will download model)
5
+ debugger = CodeDebuggerWrapper()
6
+
7
+ def debug_code(code: str):
8
+ if not code or not code.strip():
9
+ return "❌ Please paste some code to debug."
10
+ try:
11
+ return debugger.debug(code)
12
+ except Exception as e:
13
+ # friendly error message
14
+ return f"Error during model inference:\n{e}"
15
+
16
+ with gr.Blocks() as demo:
17
+ gr.Markdown("## 🐞 AI Code Debugger (Fine-tuned)")
18
+ gr.Markdown("Paste Python code below and click **Debug Code**. Uses your fine-tuned HF model.")
19
+ code_input = gr.Textbox(lines=14, placeholder="Paste Python code here...", label="Input Code")
20
+ output = gr.Textbox(lines=14, label="Debugged Code / Output")
21
+ btn = gr.Button("Debug Code")
22
+ btn.click(fn=debug_code, inputs=code_input, outputs=output)
23
+
24
+ if __name__ == "__main__":
25
+ demo.launch(share=True)
config.yml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ jtype: CodeDebugger
2
+ py_modules:
3
+ - executor.py
4
+ metas:
5
+ name: code-debugger
6
+ description: "Executor that debugs AI/ML code using a fine-tuned model."
7
+ version: 0.1.0
executor.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import threading
2
+ import os
3
+ from jina import Executor, requests
4
+ from docarray import BaseDoc, DocList
5
+
6
+ # transformers imports are done lazily in _ensure_model to prevent heavy import on module load
7
+ class CodeInput(BaseDoc):
8
+ code: str
9
+
10
+ class CodeOutput(BaseDoc):
11
+ result: str
12
+
13
+ class CodeDebugger(Executor):
14
+ """
15
+ Jina Executor that lazy-loads a Hugging Face seq2seq model on first request.
16
+ Use environment variable JINA_SKIP_MODEL_LOAD=1 to skip model loading (useful in CI/builds).
17
+ """
18
+ def __init__(self, model_name: str = "Girinath11/aiml_code_debug_model", **kwargs):
19
+ super().__init__(**kwargs)
20
+ self.model_name = model_name
21
+ self._lock = threading.Lock()
22
+ self.tokenizer = None
23
+ self.model = None
24
+ # optional: allow overriding max_new_tokens via env var
25
+ self.max_new_tokens = int(os.environ.get("MAX_NEW_TOKENS", "256"))
26
+
27
+ def _ensure_model(self):
28
+ """
29
+ Load tokenizer & model once in a thread-safe manner.
30
+ If JINA_SKIP_MODEL_LOAD is set to "1", skip loading (helpful for hub builds).
31
+ """
32
+ skip = os.environ.get("JINA_SKIP_MODEL_LOAD", "0") == "1"
33
+ if skip:
34
+ self.logger.warning("JINA_SKIP_MODEL_LOAD=1 set β€” skipping HF model load.")
35
+ return
36
+
37
+ if self.model is None or self.tokenizer is None:
38
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM # lazy import
39
+ with self._lock:
40
+ if self.model is None or self.tokenizer is None:
41
+ self.logger.info(f"Loading model {self.model_name} ...")
42
+ # If HF_TOKEN is set, transformers will use it automatically via huggingface-cli login
43
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
44
+ self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
45
+ self.logger.info("Model loaded successfully.")
46
+
47
+ @requests
48
+ def debug(self, docs: DocList[CodeInput], **kwargs) -> DocList[CodeOutput]:
49
+ # Lazy load model at request time
50
+ self._ensure_model()
51
+
52
+ results = []
53
+ if self.model is None or self.tokenizer is None:
54
+ # If model was skipped, return a helpful message
55
+ for _ in docs:
56
+ results.append(CodeOutput(result="Model not loaded (JINA_SKIP_MODEL_LOAD=1)."))
57
+ return DocList[CodeOutput](results)
58
+
59
+ for doc in docs:
60
+ # make sure input is string
61
+ code_text = doc.code if isinstance(doc.code, str) else str(doc.code)
62
+ inputs = self.tokenizer(code_text, return_tensors="pt", padding=True, truncation=True)
63
+ outputs = self.model.generate(**inputs, max_new_tokens=self.max_new_tokens)
64
+ result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
65
+ results.append(CodeOutput(result=result))
66
+
67
+ return DocList[CodeOutput](results)
mainfest.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ name: girinath11/code-debugger
2
+ description: Debug Python code using a fine-tuned Hugging Face model.
3
+ keywords: [code, debug, huggingface, aiml, transformer, T5, LLM, bug-fix, python]
4
+ tags: [bug-fix, python, transformer, code-debugger]
5
+ version: 0.1.0
6
+ license: apache-2.0
7
+ url: https://github.com/Giri530/jina-code-debugger
8
+ author: Girinath V <girinathv48@gmail.com>
model_wrapper.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import threading
3
+
4
+ class CodeDebuggerWrapper:
5
+ """
6
+ Simple wrapper that loads the same HF model and exposes debug(code: str) -> str
7
+ This is used by app.py (Gradio).
8
+ """
9
+ def __init__(self, model_name: str = "Girinath11/aiml_code_debug_model"):
10
+ self.model_name = model_name
11
+ self._lock = threading.Lock()
12
+ self.tokenizer = None
13
+ self.model = None
14
+ self.max_new_tokens = int(os.environ.get("MAX_NEW_TOKENS", "256"))
15
+ self._ensure_model()
16
+
17
+ def _ensure_model(self):
18
+ # allow skipping in environments where you don't want to download weights
19
+ skip = os.environ.get("SKIP_MODEL_LOAD", "0") == "1"
20
+ if skip:
21
+ print("SKIP_MODEL_LOAD=1 -> not loading model.")
22
+ return
23
+
24
+ if self.model is None or self.tokenizer is None:
25
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
26
+ with self._lock:
27
+ if self.model is None or self.tokenizer is None:
28
+ print(f"Loading model {self.model_name} ...")
29
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
30
+ self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
31
+ print("Model loaded.")
32
+
33
+ def debug(self, code: str) -> str:
34
+ if self.model is None or self.tokenizer is None:
35
+ return "Model not loaded. Set SKIP_MODEL_LOAD=0 and ensure HF token is available if model is private."
36
+ inputs = self.tokenizer(code, return_tensors="pt", padding=True, truncation=True)
37
+ outputs = self.model.generate(**inputs, max_new_tokens=self.max_new_tokens)
38
+ return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
readme.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🐞 AI Code Debugger β€” Executor + Gradio App
2
+
3
+ This repository contains:
4
+ - `executor.py` β€” a **Jina Executor** (lazy-loads your HF model).
5
+ - `model_wrapper.py` β€” simple wrapper used by the Gradio app.
6
+ - `app.py` β€” a Gradio UI (suitable for Hugging Face Spaces or local run).
7
+ - `manifest.yml` & `config.yml` β€” metadata for Jina Hub.
8
+ - `requirements.txt` β€” dependencies.
9
+
10
+ ## Model
11
+ This uses your fine-tuned model:
12
+ `Girinath11/aiml_code_debug_model` on Hugging Face.
13
+ **If the model is private**, set `HF_TOKEN` or run `huggingface-cli login` on the host to allow downloads.
14
+
15
+ ---
16
+
17
+ ## Run locally (quick)
18
+ 1. Install deps:
19
+ ```bash
20
+ pip install -r requirements.txt
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core
2
+ transformers>=4.41.1
3
+ torch
4
+
5
+ # Jina executor / testing
6
+ jina>=3.21.1
7
+ docarray>=0.38.0
8
+
9
+ # Web UI (Gradio) & HF CLI
10
+ gradio
11
+ huggingface_hub
12
+
13
+ # Optional: for nicer logs in some environments
14
+ tqdm