ccclllwww commited on
Commit
5bd24ba
·
verified ·
1 Parent(s): 7463f5d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -76
app.py CHANGED
@@ -16,23 +16,13 @@ import torch
16
  # ======================================
17
 
18
  # Initialize image captioning pipeline with pretrained model
19
- # Model source: Hugging Face Model Hub
20
  _image_caption_pipeline = pipeline(
21
  task="image-to-text",
22
  model="noamrot/FuseCap_Image_Captioning"
23
  )
24
 
25
  # Global model configuration constants
26
- _MODEL_NAME = "Qwen/Qwen3-1.7B"
27
- _THINKING_TOKEN_ID = 151668 # Special token marking thinking/content separation
28
-
29
- # Initialize model components once
30
- _tokenizer = AutoTokenizer.from_pretrained(_MODEL_NAME)
31
- _model = AutoModelForCausalLM.from_pretrained(
32
- _MODEL_NAME,
33
- torch_dtype="auto",
34
- device_map="auto"
35
- )
36
 
37
  # Initialize TTS components once to avoid reloading
38
  _SPEECH_PIPELINE = pipeline("text-to-speech", model="microsoft/speecht5_tts")
@@ -95,76 +85,19 @@ def generate_story_content(system_prompt: str, user_prompt: str) -> str:
95
  {"role": "system", "content": system_prompt},
96
  {"role": "user", "content": user_prompt}
97
  ]
98
-
99
- # Format input using model-specific template
100
- formatted_input = _tokenizer.apply_chat_template(
101
- conversation_history,
102
- tokenize=False,
103
- add_generation_prompt=True,
104
- enable_thinking=False
105
- )
106
-
107
- # Tokenize and prepare model inputs
108
- model_inputs = _tokenizer(
109
- [formatted_input],
110
- return_tensors="pt"
111
- ).to(_model.device)
112
-
113
- # Generate text completion
114
- generated_sequences = _model.generate(
115
- **model_inputs,
116
- max_new_tokens=150
117
- )
118
 
119
  # Process and clean output
120
- return _process_generated_output(
121
- generated_sequences,
122
- model_inputs.input_ids
123
- )
124
 
125
  except Exception as error:
126
  raise RuntimeError(f"Story generation failed: {str(error)}") from error
127
 
128
- def _process_generated_output(generated_sequences: list, input_ids: list) -> str:
129
- """
130
- Processes raw model output to extract final content.
131
-
132
- Args:
133
- generated_sequences: Raw output sequences from model generation
134
- input_ids: Original input token IDs used for generation
135
-
136
- Returns:
137
- Cleaned final content text
138
- """
139
- # Extract new tokens excluding original prompt
140
- new_tokens = generated_sequences[0][len(input_ids[0]):].tolist()
141
-
142
- # Find separation point between thinking and final content
143
- separation_index = _find_thinking_separation(new_tokens)
144
-
145
- # Decode and clean final content
146
- return _tokenizer.decode(
147
- new_tokens[separation_index:],
148
- skip_special_tokens=True
149
- ).strip("\n")
150
-
151
- def _find_thinking_separation(token_sequence: list) -> int:
152
- """
153
- Locates the boundary between thinking process and final content.
154
-
155
- Args:
156
- token_sequence: List of generated token IDs
157
-
158
- Returns:
159
- Index position marking the start of final content
160
- """
161
- try:
162
- # Search from end for separation token
163
- reverse_position = token_sequence[::-1].index(_THINKING_TOKEN_ID)
164
- return len(token_sequence) - reverse_position
165
- except ValueError:
166
- return 0 # Return start if token not found
167
-
168
  def generate_audio_from_story(story_text: str, output_path: str = "output.wav") -> str:
169
  """
170
  Convert text story to speech audio file using text-to-speech synthesis.
@@ -238,7 +171,6 @@ st.markdown("""
238
  margin: 20px 0;
239
  box-shadow: 0 4px 8px rgba(0,0,0,0.1);
240
  }
241
-
242
  /* Image caption styling */
243
  .image-caption {
244
  border-left: 4px solid #4CAF50;
 
16
  # ======================================
17
 
18
  # Initialize image captioning pipeline with pretrained model
 
19
  _image_caption_pipeline = pipeline(
20
  task="image-to-text",
21
  model="noamrot/FuseCap_Image_Captioning"
22
  )
23
 
24
  # Global model configuration constants
25
+ _text_generation_pipeline = pipeline("text-generation", model="Qwen/Qwen1.5-0.5B",max_new_tokens=100)
 
 
 
 
 
 
 
 
 
26
 
27
  # Initialize TTS components once to avoid reloading
28
  _SPEECH_PIPELINE = pipeline("text-to-speech", model="microsoft/speecht5_tts")
 
85
  {"role": "system", "content": system_prompt},
86
  {"role": "user", "content": user_prompt}
87
  ]
88
+
89
+ # Generate the story
90
+ story=_text_generation_pipeline(conversation_history)
91
+
92
+ # Extract the stroy result
93
+ stroy_result=story[0]["generated_text"][2]["content"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
  # Process and clean output
96
+ return stroy_result
 
 
 
97
 
98
  except Exception as error:
99
  raise RuntimeError(f"Story generation failed: {str(error)}") from error
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  def generate_audio_from_story(story_text: str, output_path: str = "output.wav") -> str:
102
  """
103
  Convert text story to speech audio file using text-to-speech synthesis.
 
171
  margin: 20px 0;
172
  box-shadow: 0 4px 8px rgba(0,0,0,0.1);
173
  }
 
174
  /* Image caption styling */
175
  .image-caption {
176
  border-left: 4px solid #4CAF50;