Spaces:
Running
Running
Rishi Desai
commited on
Commit
·
89d29bd
1
Parent(s):
9e81a2f
removing outfit cmd line arg
Browse files- demo.py +1 -1
- main.py +2 -3
- system_prompt.txt +6 -6
demo.py
CHANGED
@@ -55,7 +55,7 @@ def process_uploaded_images(image_paths, batch_by_category=False):
|
|
55 |
path_mapping[str(temp_path)] = str(path)
|
56 |
|
57 |
# Process the images using main.py's function
|
58 |
-
process_images(temp_input_dir, temp_output_dir,
|
59 |
|
60 |
# Collect the captions from the output directory
|
61 |
captions = []
|
|
|
55 |
path_mapping[str(temp_path)] = str(path)
|
56 |
|
57 |
# Process the images using main.py's function
|
58 |
+
process_images(temp_input_dir, temp_output_dir, batch_images=batch_by_category)
|
59 |
|
60 |
# Collect the captions from the output directory
|
61 |
captions = []
|
main.py
CHANGED
@@ -108,7 +108,7 @@ def process_all_at_once(images_by_category, image_paths_by_category, input_path,
|
|
108 |
return processed_count
|
109 |
|
110 |
|
111 |
-
def process_images(input_dir, output_dir,
|
112 |
"""Process all images in the input directory and generate captions."""
|
113 |
input_path = Path(input_dir)
|
114 |
output_path = Path(output_dir) if output_dir else input_path
|
@@ -159,7 +159,6 @@ def main():
|
|
159 |
parser = argparse.ArgumentParser(description='Generate captions for images using GPT-4o.')
|
160 |
parser.add_argument('--input', type=str, required=True, help='Directory containing images')
|
161 |
parser.add_argument('--output', type=str, help='Directory to save images and captions (defaults to input directory)')
|
162 |
-
parser.add_argument('--fix_outfit', action='store_true', help='Flag to indicate if character has one outfit')
|
163 |
parser.add_argument('--batch_images', action='store_true', help='Flag to indicate if images should be processed in batches')
|
164 |
|
165 |
args = parser.parse_args()
|
@@ -168,7 +167,7 @@ def main():
|
|
168 |
print(f"Error: Input directory '{args.input}' does not exist.")
|
169 |
return
|
170 |
|
171 |
-
process_images(args.input, args.output, args.
|
172 |
|
173 |
|
174 |
if __name__ == "__main__":
|
|
|
108 |
return processed_count
|
109 |
|
110 |
|
111 |
+
def process_images(input_dir, output_dir, batch_images=False):
|
112 |
"""Process all images in the input directory and generate captions."""
|
113 |
input_path = Path(input_dir)
|
114 |
output_path = Path(output_dir) if output_dir else input_path
|
|
|
159 |
parser = argparse.ArgumentParser(description='Generate captions for images using GPT-4o.')
|
160 |
parser.add_argument('--input', type=str, required=True, help='Directory containing images')
|
161 |
parser.add_argument('--output', type=str, help='Directory to save images and captions (defaults to input directory)')
|
|
|
162 |
parser.add_argument('--batch_images', action='store_true', help='Flag to indicate if images should be processed in batches')
|
163 |
|
164 |
args = parser.parse_args()
|
|
|
167 |
print(f"Error: Input directory '{args.input}' does not exist.")
|
168 |
return
|
169 |
|
170 |
+
process_images(args.input, args.output, args.batch_images)
|
171 |
|
172 |
|
173 |
if __name__ == "__main__":
|
system_prompt.txt
CHANGED
@@ -19,7 +19,7 @@ Avoid Describing These Unless Variable Across Dataset or Uncertain from Concept:
|
|
19 |
- Known accessories that always appear (unless outfit-specific)
|
20 |
|
21 |
Updated Caption Format:
|
22 |
-
|
23 |
|
24 |
Captioning Principles:
|
25 |
- Emphasize visual variation and context-specific details (outfit, pose, lighting, expression, camera angle).
|
@@ -29,10 +29,10 @@ Captioning Principles:
|
|
29 |
- Specify lighting conditions (soft lighting, harsh shadows, glowing backlight).
|
30 |
- Explicitly state camera angle (e.g., front view, right side profile, low-angle, high-angle, overhead).
|
31 |
- Avoid mentioning real or fictional identities.
|
32 |
-
- Always prefix with the trigger word "
|
33 |
|
34 |
Updated Examples:
|
35 |
-
-
|
36 |
-
-
|
37 |
-
-
|
38 |
-
-
|
|
|
19 |
- Known accessories that always appear (unless outfit-specific)
|
20 |
|
21 |
Updated Caption Format:
|
22 |
+
tr1gg3r [Style], [Notable Visual Features], [Clothing], [Pose], [Expression/Mood], [Background/Setting], [Lighting], [Camera Angle]
|
23 |
|
24 |
Captioning Principles:
|
25 |
- Emphasize visual variation and context-specific details (outfit, pose, lighting, expression, camera angle).
|
|
|
29 |
- Specify lighting conditions (soft lighting, harsh shadows, glowing backlight).
|
30 |
- Explicitly state camera angle (e.g., front view, right side profile, low-angle, high-angle, overhead).
|
31 |
- Avoid mentioning real or fictional identities.
|
32 |
+
- Always prefix with the trigger word "tr1gg3r."
|
33 |
|
34 |
Updated Examples:
|
35 |
+
- tr1gg3r photorealistic, combat gear, tactical vest and gloves, standing in profile, neutral, empty room, overcast lighting, side profile
|
36 |
+
- tr1gg3r 3D-rendered, digital patterns, hooded cloak, seated cross-legged, calm, meditation chamber, low ambient lighting, front view
|
37 |
+
- tr1gg3r anime-style, school uniform with blue necktie, standing with arms behind back, gentle smile, classroom, soft daylight, three-quarter view
|
38 |
+
- tr1gg3r photorealistic, long trench coat and combat boots, walking, determined, rain-soaked street, dramatic shadows, low-angle view
|