Spaces:

rdesai2
/

LoRACaptioner

Running

Rishi Desai commited on May 3

Commit

89d29bd

1 Parent(s): 9e81a2f

removing outfit cmd line arg

Files changed (3) hide show

demo.py CHANGED Viewed

@@ -55,7 +55,7 @@ def process_uploaded_images(image_paths, batch_by_category=False):
             path_mapping[str(temp_path)] = str(path)
         # Process the images using main.py's function
-        process_images(temp_input_dir, temp_output_dir, fix_outfit=False, batch_images=batch_by_category)
         # Collect the captions from the output directory
         captions = []

             path_mapping[str(temp_path)] = str(path)
         # Process the images using main.py's function
+        process_images(temp_input_dir, temp_output_dir, batch_images=batch_by_category)
         # Collect the captions from the output directory
         captions = []

main.py CHANGED Viewed

@@ -108,7 +108,7 @@ def process_all_at_once(images_by_category, image_paths_by_category, input_path,
     return processed_count
-def process_images(input_dir, output_dir, fix_outfit=False, batch_images=False):
     """Process all images in the input directory and generate captions."""
     input_path = Path(input_dir)
     output_path = Path(output_dir) if output_dir else input_path
@@ -159,7 +159,6 @@ def main():
     parser = argparse.ArgumentParser(description='Generate captions for images using GPT-4o.')
     parser.add_argument('--input', type=str, required=True, help='Directory containing images')
     parser.add_argument('--output', type=str, help='Directory to save images and captions (defaults to input directory)')
-    parser.add_argument('--fix_outfit', action='store_true', help='Flag to indicate if character has one outfit')
     parser.add_argument('--batch_images', action='store_true', help='Flag to indicate if images should be processed in batches')
     args = parser.parse_args()
@@ -168,7 +167,7 @@ def main():
         print(f"Error: Input directory '{args.input}' does not exist.")
         return
-    process_images(args.input, args.output, args.fix_outfit, args.batch_images)
 if __name__ == "__main__":

     return processed_count
+def process_images(input_dir, output_dir, batch_images=False):
     """Process all images in the input directory and generate captions."""
     input_path = Path(input_dir)
     output_path = Path(output_dir) if output_dir else input_path
     parser = argparse.ArgumentParser(description='Generate captions for images using GPT-4o.')
     parser.add_argument('--input', type=str, required=True, help='Directory containing images')
     parser.add_argument('--output', type=str, help='Directory to save images and captions (defaults to input directory)')
     parser.add_argument('--batch_images', action='store_true', help='Flag to indicate if images should be processed in batches')
     args = parser.parse_args()
         print(f"Error: Input directory '{args.input}' does not exist.")
         return
+    process_images(args.input, args.output, args.batch_images)
 if __name__ == "__main__":

system_prompt.txt CHANGED Viewed

@@ -19,7 +19,7 @@ Avoid Describing These Unless Variable Across Dataset or Uncertain from Concept:
 - Known accessories that always appear (unless outfit-specific)
 Updated Caption Format:
-tr1gger [Style], [Notable Visual Features], [Clothing], [Pose], [Expression/Mood], [Background/Setting], [Lighting], [Camera Angle]
 Captioning Principles:
 - Emphasize visual variation and context-specific details (outfit, pose, lighting, expression, camera angle).
@@ -29,10 +29,10 @@ Captioning Principles:
 - Specify lighting conditions (soft lighting, harsh shadows, glowing backlight).
 - Explicitly state camera angle (e.g., front view, right side profile, low-angle, high-angle, overhead).
 - Avoid mentioning real or fictional identities.
-- Always prefix with the trigger word "tr1gger."
 Updated Examples:
-- tr1gger photorealistic, combat gear, tactical vest and gloves, standing in profile, neutral, empty room, overcast lighting, side profile
-- tr1gger 3D-rendered, digital patterns, hooded cloak, seated cross-legged, calm, meditation chamber, low ambient lighting, front view
-- tr1gger anime-style, school uniform with blue necktie, standing with arms behind back, gentle smile, classroom, soft daylight, three-quarter view
-- tr1gger photorealistic, long trench coat and combat boots, walking, determined, rain-soaked street, dramatic shadows, low-angle view

 - Known accessories that always appear (unless outfit-specific)
 Updated Caption Format:
+tr1gg3r [Style], [Notable Visual Features], [Clothing], [Pose], [Expression/Mood], [Background/Setting], [Lighting], [Camera Angle]
 Captioning Principles:
 - Emphasize visual variation and context-specific details (outfit, pose, lighting, expression, camera angle).
 - Specify lighting conditions (soft lighting, harsh shadows, glowing backlight).
 - Explicitly state camera angle (e.g., front view, right side profile, low-angle, high-angle, overhead).
 - Avoid mentioning real or fictional identities.
+- Always prefix with the trigger word "tr1gg3r."
 Updated Examples:
+- tr1gg3r photorealistic, combat gear, tactical vest and gloves, standing in profile, neutral, empty room, overcast lighting, side profile
+- tr1gg3r 3D-rendered, digital patterns, hooded cloak, seated cross-legged, calm, meditation chamber, low ambient lighting, front view
+- tr1gg3r anime-style, school uniform with blue necktie, standing with arms behind back, gentle smile, classroom, soft daylight, three-quarter view
+- tr1gg3r photorealistic, long trench coat and combat boots, walking, determined, rain-soaked street, dramatic shadows, low-angle view