dodo13114arch commited on
Commit
6d2264b
·
verified ·
1 Parent(s): 62acd30

v2.0 add openai new model

Browse files
Files changed (1) hide show
  1. mistralocr_app_demo.py +22 -10
mistralocr_app_demo.py CHANGED
@@ -400,8 +400,8 @@ Return ONLY the JSON object, without any surrounding text or markdown formatting
400
  # Fallback: return the raw text wrapped in a basic JSON structure
401
  pretty_text = json.dumps({"error": "Failed to parse Gemini JSON response", "raw_output": raw_json_text}, indent=2, ensure_ascii=False)
402
 
403
- elif structure_model == "gpt-4o-mini":
404
- print(f" - Using OpenAI GPT-4o mini...")
405
  if not openai_client:
406
  print(" - ⚠️ OpenAI client not initialized. Skipping.")
407
  return json.dumps({"error": "OpenAI client not initialized. Check API key and library installation."}, indent=2, ensure_ascii=False)
@@ -1101,9 +1101,17 @@ def create_gradio_interface():
1101
  )
1102
  structure_model = gr.Dropdown(
1103
  label="結構化模型 (用於圖片 OCR)",
1104
- choices=["pixtral-12b-latest", "gemini-2.0-flash", "gpt-4o-mini", "gpt-4o"], # Added gpt-4o
 
 
 
 
 
 
 
 
1105
  value="gemini-2.0-flash",
1106
- info="選擇用於結構化圖片 OCR 結果的模型。需要對應的 API Key。"
1107
  )
1108
  structure_text_only = gr.Checkbox(
1109
  label="僅用文字進行結構化 (節省 Token)",
@@ -1113,13 +1121,17 @@ def create_gradio_interface():
1113
  translation_model = gr.Dropdown(
1114
  label="翻譯模型",
1115
  choices=[
1116
- "gemini-2.0-flash",
1117
- "gemini-2.5-pro-exp-03-25",
1118
- "gemini-2.0-flash-lite",
1119
- "gpt-4o", # Added OpenAI models
1120
- "gpt-4o-mini"
 
 
 
1121
  ],
1122
- value="gemini-2.0-flash"
 
1123
  )
1124
  with gr.Accordion("進階設定", open=False):
1125
  translation_system_prompt = gr.Textbox(
 
400
  # Fallback: return the raw text wrapped in a basic JSON structure
401
  pretty_text = json.dumps({"error": "Failed to parse Gemini JSON response", "raw_output": raw_json_text}, indent=2, ensure_ascii=False)
402
 
403
+ elif structure_model.startswith("gpt-"):
404
+ print(f" - Using OpenAI model: {structure_model}...")
405
  if not openai_client:
406
  print(" - ⚠️ OpenAI client not initialized. Skipping.")
407
  return json.dumps({"error": "OpenAI client not initialized. Check API key and library installation."}, indent=2, ensure_ascii=False)
 
1101
  )
1102
  structure_model = gr.Dropdown(
1103
  label="結構化模型 (用於圖片 OCR)",
1104
+ choices=[
1105
+ ("pixtral-12b-latest (Recommend)", "pixtral-12b-latest"),
1106
+ ("gemini-2.0-flash (Recommend)", "gemini-2.0-flash"),
1107
+ ("gpt-4o-mini", "gpt-4o-mini"),
1108
+ ("gpt-4o", "gpt-4o"),
1109
+ ("gpt-4.1-nano (Not Recommend)", "gpt-4.1-nano"),
1110
+ ("gpt-4.1-mini", "gpt-4.1-mini"),
1111
+ ("gpt-4.1", "gpt-4.1")
1112
+ ],
1113
  value="gemini-2.0-flash",
1114
+ info="選擇用於結構化圖片 OCR 結果的模型。需要對應的 API Key。"
1115
  )
1116
  structure_text_only = gr.Checkbox(
1117
  label="僅用文字進行結構化 (節省 Token)",
 
1121
  translation_model = gr.Dropdown(
1122
  label="翻譯模型",
1123
  choices=[
1124
+ ("gemini-2.0-flash (Recommend)", "gemini-2.0-flash"),
1125
+ ("gemini-2.5-pro-exp-03-25", "gemini-2.5-pro-exp-03-25"),
1126
+ ("gemini-2.0-flash-lite", "gemini-2.0-flash-lite"),
1127
+ ("gpt-4o", "gpt-4o"),
1128
+ ("gpt-4o-mini", "gpt-4o-mini"),
1129
+ ("gpt-4.1-nano (Not Recommend)", "gpt-4.1-nano"),
1130
+ ("gpt-4.1-mini", "gpt-4.1-mini"),
1131
+ ("gpt-4.1", "gpt-4.1")
1132
  ],
1133
+ value="gemini-2.0-flash",
1134
+ info="選擇用於翻譯的模型。需要對應的 API Key。"
1135
  )
1136
  with gr.Accordion("進階設定", open=False):
1137
  translation_system_prompt = gr.Textbox(