Update opensource_inference.py
Browse files- opensource_inference.py +49 -9
opensource_inference.py
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
import os
|
2 |
import json
|
3 |
import numpy as np
|
@@ -452,19 +454,57 @@ Conversation:
|
|
452 |
}
|
453 |
|
454 |
|
455 |
-
# Example usage
|
456 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
457 |
# Initialize predictor with GGUF model
|
458 |
predictor = SalesConversionPredictor(
|
459 |
-
model_path=
|
460 |
-
embedding_model_name=
|
461 |
-
llm_gguf_path=
|
462 |
-
use_gpu=
|
463 |
-
n_gpu_layers=
|
464 |
-
n_ctx=
|
465 |
-
use_mini_embeddings=True
|
|
|
466 |
)
|
467 |
-
|
468 |
# Test with different conversation scenarios
|
469 |
scenarios = [
|
470 |
{
|
|
|
1 |
+
%%writefile main.py
|
2 |
+
import argparse
|
3 |
import os
|
4 |
import json
|
5 |
import numpy as np
|
|
|
454 |
}
|
455 |
|
456 |
|
|
|
457 |
if __name__ == "__main__":
|
458 |
+
parser = argparse.ArgumentParser(description="Sales Conversion Predictor")
|
459 |
+
parser.add_argument(
|
460 |
+
"--model_path",
|
461 |
+
type=str,
|
462 |
+
default="/content/sales-conversion-model-reinf-learning/sales_conversion_model",
|
463 |
+
help="Path to the trained PPO model zip file."
|
464 |
+
)
|
465 |
+
parser.add_argument(
|
466 |
+
"--embedding_model_name",
|
467 |
+
type=str,
|
468 |
+
default="BAAI/bge-m3", # Defaulting to bge-m3 as per example
|
469 |
+
help="Name of the Hugging Face embedding model (e.g., 'BAAI/bge-m3', 'BAAI/bge-large-en-v1.5')."
|
470 |
+
)
|
471 |
+
parser.add_argument(
|
472 |
+
"--llm_gguf_path",
|
473 |
+
type=str,
|
474 |
+
default="unsloth/gemma-3-4b-it-GGUF", # Defaulting to a repo ID as per example
|
475 |
+
help="Path to the GGUF LLM model file, a local directory containing GGUF files, or a HuggingFace repo_id."
|
476 |
+
)
|
477 |
+
parser.add_argument(
|
478 |
+
"--no_gpu",
|
479 |
+
action="store_true",
|
480 |
+
help="Disable GPU usage (use CPU only)."
|
481 |
+
)
|
482 |
+
parser.add_argument(
|
483 |
+
"--n_gpu_layers",
|
484 |
+
type=int,
|
485 |
+
default=-1, # Default to all layers on GPU for llama.cpp
|
486 |
+
help="Number of LLM layers to offload to GPU. -1 for all, 0 for none."
|
487 |
+
)
|
488 |
+
parser.add_argument(
|
489 |
+
"--n_ctx",
|
490 |
+
type=int,
|
491 |
+
default=2048,
|
492 |
+
help="Context window size for the LLM."
|
493 |
+
)
|
494 |
+
|
495 |
+
args = parser.parse_args()
|
496 |
+
|
497 |
# Initialize predictor with GGUF model
|
498 |
predictor = SalesConversionPredictor(
|
499 |
+
model_path=args.model_path,
|
500 |
+
embedding_model_name=args.embedding_model_name,
|
501 |
+
llm_gguf_path=args.llm_gguf_path,
|
502 |
+
use_gpu=not args.no_gpu,
|
503 |
+
n_gpu_layers=args.n_gpu_layers,
|
504 |
+
n_ctx=args.n_ctx,
|
505 |
+
use_mini_embeddings=True # Kept from original, PPO model should match this if it affects state vector.
|
506 |
+
# Currently, embedding dim is fixed at 1024 in code.
|
507 |
)
|
|
|
508 |
# Test with different conversation scenarios
|
509 |
scenarios = [
|
510 |
{
|