Spaces:
Sleeping
Sleeping
Upload 2 files
Browse filesUsing a quantised version of the gemma-9b (neuralmagic/gemma-2-9b-it-quantized.w4a16)
- app.py +2 -2
- requirements.txt +2 -0
app.py
CHANGED
@@ -161,8 +161,8 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
161 |
# #model = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME)
|
162 |
# model = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME).to(device)
|
163 |
|
164 |
-
|
165 |
-
model_id = "TheBloke/Gemma-2-7B-IT-GGUF"
|
166 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
167 |
model = AutoModelForCausalLM.from_pretrained(
|
168 |
model_id,
|
|
|
161 |
# #model = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME)
|
162 |
# model = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME).to(device)
|
163 |
|
164 |
+
model_id = "neuralmagic/gemma-2-9b-it-quantized.w4a16"
|
165 |
+
#model_id = "TheBloke/Gemma-2-7B-IT-GGUF"
|
166 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
167 |
model = AutoModelForCausalLM.from_pretrained(
|
168 |
model_id,
|
requirements.txt
CHANGED
@@ -13,4 +13,6 @@ openai-whisper
|
|
13 |
uuid
|
14 |
textwrap3
|
15 |
python-dotenv
|
|
|
|
|
16 |
|
|
|
13 |
uuid
|
14 |
textwrap3
|
15 |
python-dotenv
|
16 |
+
pyngrok
|
17 |
+
compressed-tensors
|
18 |
|