Jaamie commited on
Commit
2e238f3
·
verified ·
1 Parent(s): 69dd81f

Upload 2 files

Browse files

Using a quantised version of the gemma-9b (neuralmagic/gemma-2-9b-it-quantized.w4a16)

Files changed (2) hide show
  1. app.py +2 -2
  2. requirements.txt +2 -0
app.py CHANGED
@@ -161,8 +161,8 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
161
  # #model = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME)
162
  # model = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME).to(device)
163
 
164
- #model_id = "neuralmagic/gemma-2-9b-it-quantized.w4a16"
165
- model_id = "TheBloke/Gemma-2-7B-IT-GGUF"
166
  tokenizer = AutoTokenizer.from_pretrained(model_id)
167
  model = AutoModelForCausalLM.from_pretrained(
168
  model_id,
 
161
  # #model = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME)
162
  # model = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME).to(device)
163
 
164
+ model_id = "neuralmagic/gemma-2-9b-it-quantized.w4a16"
165
+ #model_id = "TheBloke/Gemma-2-7B-IT-GGUF"
166
  tokenizer = AutoTokenizer.from_pretrained(model_id)
167
  model = AutoModelForCausalLM.from_pretrained(
168
  model_id,
requirements.txt CHANGED
@@ -13,4 +13,6 @@ openai-whisper
13
  uuid
14
  textwrap3
15
  python-dotenv
 
 
16
 
 
13
  uuid
14
  textwrap3
15
  python-dotenv
16
+ pyngrok
17
+ compressed-tensors
18