Spaces:

pr0ximaCent
/

caption

Sleeping

App Files Files Community

pr0ximaCent commited on May 26

Commit

de58874

verified ·

1 Parent(s): 7034074

Update app.py

Browse files

Files changed (1) hide show

app.py +132 -41

app.py CHANGED Viewed

@@ -6,6 +6,27 @@ from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
 from tensorflow.keras.preprocessing.image import img_to_array
 from tensorflow.keras.preprocessing.sequence import pad_sequences
 import pickle
 # Custom function to handle attention mechanism
 def attention_function(inputs):
@@ -70,53 +91,111 @@ def get_custom_objects(attention_func, output_shape_func):
 # Multiple loading strategies with different attention mechanisms
 def load_model_safely():
-    attention_strategies = [
-        (attention_function, attention_output_shape),
-        (attention_function_v2, attention_output_shape),
-        (attention_function_v3, attention_output_shape),
-    ]
-    for i, (att_func, shape_func) in enumerate(attention_strategies, 1):
-        try:
-            print(f"Trying attention strategy {i}...")
-            custom_objects = get_custom_objects(att_func, shape_func)
-            model = tf.keras.models.load_model("caption_model.h5", custom_objects=custom_objects)
-            print(f"Model loaded successfully using attention strategy {i}!")
-            return model
-        except Exception as e:
-            print(f"Attention strategy {i} failed: {e}")
-            continue
-    # If all attention strategies fail, try loading without compilation
     try:
-        print("Trying to load without compilation...")
         model = tf.keras.models.load_model("caption_model.h5", compile=False)
-        print("Model loaded without compilation!")
         return model
     except Exception as e:
-        print(f"Loading without compilation failed: {e}")
-    # Last resort: try to load and rebuild the model
     try:
-        print("Attempting to load model weights only...")
-        # This is a more complex approach that would require knowing the model architecture
-        raise Exception("Model architecture reconstruction needed")
-    except:
-        pass
-    raise Exception("All loading strategies failed. The model may need to be retrained or converted.")
 # Load your pre-trained model and tokenizer
-try:
-    model = load_model_safely()
-except Exception as e:
-    print(f"Failed to load model: {e}")
-    print("Creating a dummy model for testing...")
-    # Create a simple dummy model for testing the interface
     model = None
-with open("tokenizer.pkl", "rb") as handle:
-    tokenizer = pickle.load(handle)
 # Image feature extractor model
 feature_extractor = VGG16()
@@ -126,7 +205,10 @@ feature_extractor = tf.keras.Model(feature_extractor.input, feature_extractor.la
 def generate_caption(image):
     try:
         if model is None:
-            return "Model failed to load. Please check the model file."
         # Preprocess the image
         image = image.resize((224, 224))
@@ -135,22 +217,27 @@ def generate_caption(image):
         image = preprocess_input(image)
         # Extract features
         feature = feature_extractor.predict(image, verbose=0)
         # Generate caption
         input_text = 'startseq'
         max_length = 34  # set this to your model's max_length
-        for _ in range(max_length):
             sequence = tokenizer.texts_to_sequences([input_text])[0]
             sequence = pad_sequences([sequence], maxlen=max_length)
             try:
                 yhat = model.predict([feature, sequence], verbose=0)
                 yhat = np.argmax(yhat)
             except Exception as e:
-                print(f"Prediction error: {e}")
-                return f"Error during prediction: {str(e)}"
             word = ''
             for w, i in tokenizer.word_index.items():
@@ -158,15 +245,19 @@ def generate_caption(image):
                     word = w
                     break
             if word == 'endseq' or word == '':
                 break
             input_text += ' ' + word
         caption = input_text.replace('startseq', '').strip()
-        return caption if caption else "Unable to generate caption"
     except Exception as e:
-        return f"Error processing image: {str(e)}"
 # Gradio Interface
 title = "📸 Image Caption Generator"

 from tensorflow.keras.preprocessing.image import img_to_array
 from tensorflow.keras.preprocessing.sequence import pad_sequences
 import pickle
+import os
+# Check if required files exist
+def check_required_files():
+    required_files = ["caption_model.h5", "tokenizer.pkl"]
+    missing_files = []
+    for file in required_files:
+        if not os.path.exists(file):
+            missing_files.append(file)
+        else:
+            size = os.path.getsize(file)
+            print(f"✓ Found {file} ({size} bytes)")
+    if missing_files:
+        print(f"✗ Missing files: {missing_files}")
+        return False
+    return True
+print("Checking required files...")
+files_exist = check_required_files()
 # Custom function to handle attention mechanism
 def attention_function(inputs):
 # Multiple loading strategies with different attention mechanisms
 def load_model_safely():
+    print("Starting model loading process...")
+    # Strategy 1: Try with custom Lambda that handles the attention operation
+    try:
+        print("Strategy 1: Loading with custom attention Lambda...")
+        def custom_attention(inputs):
+            """Handle attention mechanism between two inputs"""
+            if len(inputs) == 2:
+                attention_weights, features = inputs
+                # Simple attention: multiply attention weights with features
+                # Expand attention weights to match feature dimensions
+                if len(attention_weights.shape) == 3 and len(features.shape) == 3:
+                    attention_weights = tf.expand_dims(attention_weights, axis=-1)
+                    return tf.multiply(attention_weights, features)
+            return inputs[0] if isinstance(inputs, list) else inputs
+        custom_objects = {
+            'Lambda': lambda function=None, output_shape=None, **kwargs:
+                tf.keras.layers.Lambda(
+                    custom_attention if function is None else function,
+                    output_shape=lambda input_shape: input_shape[1] if isinstance(input_shape, list) else input_shape,
+                    **kwargs
+                )
+        }
+        model = tf.keras.models.load_model("caption_model.h5", custom_objects=custom_objects)
+        print("✓ Strategy 1 successful!")
+        return model
+    except Exception as e:
+        print(f"✗ Strategy 1 failed: {str(e)[:200]}...")
+    # Strategy 2: Load with compile=False and try to fix compilation later
     try:
+        print("Strategy 2: Loading without compilation...")
         model = tf.keras.models.load_model("caption_model.h5", compile=False)
+        print("✓ Strategy 2 successful!")
+        return model
+    except Exception as e:
+        print(f"✗ Strategy 2 failed: {str(e)[:200]}...")
+    # Strategy 3: Try loading with TensorFlow's built-in Lambda handling
+    try:
+        print("Strategy 3: Loading with default Lambda handling...")
+        def identity_function(x):
+            if isinstance(x, list) and len(x) == 2:
+                # For attention mechanism, return the second input (features)
+                return x[1]
+            return x
+        custom_objects = {
+            'Lambda': lambda function=identity_function, output_shape=None, **kwargs:
+                tf.keras.layers.Lambda(
+                    function,
+                    output_shape=lambda input_shape: input_shape[1] if isinstance(input_shape, list) else input_shape,
+                    **kwargs
+                )
+        }
+        model = tf.keras.models.load_model("caption_model.h5", custom_objects=custom_objects)
+        print("✓ Strategy 3 successful!")
         return model
     except Exception as e:
+        print(f"✗ Strategy 3 failed: {str(e)[:200]}...")
+    # Strategy 4: Try with minimal custom objects
     try:
+        print("Strategy 4: Loading with minimal custom objects...")
+        model = tf.keras.models.load_model("caption_model.h5", custom_objects={'Lambda': tf.keras.layers.Lambda})
+        print("✓ Strategy 4 successful!")
+        return model
+    except Exception as e:
+        print(f"✗ Strategy 4 failed: {str(e)[:200]}...")
+    print("All strategies failed. Model could not be loaded.")
+    raise Exception("All model loading strategies failed. The model file may be corrupted or incompatible.")
 # Load your pre-trained model and tokenizer
+if not files_exist:
+    print("Cannot proceed without required files.")
     model = None
+    tokenizer = None
+else:
+    # Load tokenizer first
+    try:
+        with open("tokenizer.pkl", "rb") as handle:
+            tokenizer = pickle.load(handle)
+        print("✓ Tokenizer loaded successfully")
+    except Exception as e:
+        print(f"✗ Failed to load tokenizer: {e}")
+        tokenizer = None
+    # Load model
+    try:
+        model = load_model_safely()
+        print("✓ Model loaded successfully and ready for inference!")
+    except Exception as e:
+        print(f"✗ Failed to load model: {e}")
+        print("The app will not work without a properly loaded model.")
+        model = None
 # Image feature extractor model
 feature_extractor = VGG16()
 def generate_caption(image):
     try:
         if model is None:
+            return "❌ Model failed to load. Please check the model file and console output for details."
+        if tokenizer is None:
+            return "❌ Tokenizer failed to load. Please check the tokenizer.pkl file."
         # Preprocess the image
         image = image.resize((224, 224))
         image = preprocess_input(image)
         # Extract features
+        print("Extracting image features...")
         feature = feature_extractor.predict(image, verbose=0)
+        print(f"Features extracted, shape: {feature.shape}")
         # Generate caption
         input_text = 'startseq'
         max_length = 34  # set this to your model's max_length
+        print("Starting caption generation...")
+        for i in range(max_length):
             sequence = tokenizer.texts_to_sequences([input_text])[0]
             sequence = pad_sequences([sequence], maxlen=max_length)
             try:
+                print(f"Prediction step {i+1}: input_text = '{input_text}'")
                 yhat = model.predict([feature, sequence], verbose=0)
                 yhat = np.argmax(yhat)
+                print(f"Predicted token index: {yhat}")
             except Exception as e:
+                print(f"Prediction error at step {i+1}: {e}")
+                return f"❌ Error during prediction: {str(e)}"
             word = ''
             for w, i in tokenizer.word_index.items():
                     word = w
                     break
+            print(f"Predicted word: '{word}'")
             if word == 'endseq' or word == '':
                 break
             input_text += ' ' + word
         caption = input_text.replace('startseq', '').strip()
+        print(f"Final caption: '{caption}'")
+        return f"✅ {caption}" if caption else "❌ Unable to generate caption"
     except Exception as e:
+        error_msg = f"❌ Error processing image: {str(e)}"
+        print(error_msg)
+        return error_msg
 # Gradio Interface
 title = "📸 Image Caption Generator"