DeepMostInnovations commited on
Commit
f3d1e0c
·
verified ·
1 Parent(s): 0dc313a

Update opensource_inference.py

Browse files
Files changed (1) hide show
  1. opensource_inference.py +49 -9
opensource_inference.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import os
2
  import json
3
  import numpy as np
@@ -452,19 +454,57 @@ Conversation:
452
  }
453
 
454
 
455
- # Example usage
456
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
457
  # Initialize predictor with GGUF model
458
  predictor = SalesConversionPredictor(
459
- model_path="/content/sales-conversion-model-reinf-learning/sales_conversion_model", # path to the model
460
- embedding_model_name="BAAI/bge-m3",
461
- llm_gguf_path="unsloth/gemma-3-4b-it-GGUF", # Update this path!
462
- use_gpu=True,
463
- n_gpu_layers=20, # Use all layers on GPU
464
- n_ctx=2048, # Context window size
465
- use_mini_embeddings=True # Set to match how the model was trained
 
466
  )
467
-
468
  # Test with different conversation scenarios
469
  scenarios = [
470
  {
 
1
+ %%writefile main.py
2
+ import argparse
3
  import os
4
  import json
5
  import numpy as np
 
454
  }
455
 
456
 
 
457
  if __name__ == "__main__":
458
+ parser = argparse.ArgumentParser(description="Sales Conversion Predictor")
459
+ parser.add_argument(
460
+ "--model_path",
461
+ type=str,
462
+ default="/content/sales-conversion-model-reinf-learning/sales_conversion_model",
463
+ help="Path to the trained PPO model zip file."
464
+ )
465
+ parser.add_argument(
466
+ "--embedding_model_name",
467
+ type=str,
468
+ default="BAAI/bge-m3", # Defaulting to bge-m3 as per example
469
+ help="Name of the Hugging Face embedding model (e.g., 'BAAI/bge-m3', 'BAAI/bge-large-en-v1.5')."
470
+ )
471
+ parser.add_argument(
472
+ "--llm_gguf_path",
473
+ type=str,
474
+ default="unsloth/gemma-3-4b-it-GGUF", # Defaulting to a repo ID as per example
475
+ help="Path to the GGUF LLM model file, a local directory containing GGUF files, or a HuggingFace repo_id."
476
+ )
477
+ parser.add_argument(
478
+ "--no_gpu",
479
+ action="store_true",
480
+ help="Disable GPU usage (use CPU only)."
481
+ )
482
+ parser.add_argument(
483
+ "--n_gpu_layers",
484
+ type=int,
485
+ default=-1, # Default to all layers on GPU for llama.cpp
486
+ help="Number of LLM layers to offload to GPU. -1 for all, 0 for none."
487
+ )
488
+ parser.add_argument(
489
+ "--n_ctx",
490
+ type=int,
491
+ default=2048,
492
+ help="Context window size for the LLM."
493
+ )
494
+
495
+ args = parser.parse_args()
496
+
497
  # Initialize predictor with GGUF model
498
  predictor = SalesConversionPredictor(
499
+ model_path=args.model_path,
500
+ embedding_model_name=args.embedding_model_name,
501
+ llm_gguf_path=args.llm_gguf_path,
502
+ use_gpu=not args.no_gpu,
503
+ n_gpu_layers=args.n_gpu_layers,
504
+ n_ctx=args.n_ctx,
505
+ use_mini_embeddings=True # Kept from original, PPO model should match this if it affects state vector.
506
+ # Currently, embedding dim is fixed at 1024 in code.
507
  )
 
508
  # Test with different conversation scenarios
509
  scenarios = [
510
  {