File size: 1,096 Bytes
2999791
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from transformers import AutoTokenizer
import gradio as gr
import os

# Retrieve the Hugging Face token from secrets
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")

def tokenize(input_text):
    qwen_tokens = len(qwen_tokenizer(input_text, add_special_tokens=True)["input_ids"])
    deepseek_tokens = len(deepseek_tokenizer(input_text, add_special_tokens=True)["input_ids"])

    results = {
        "Qwen2.5-0.5B": qwen_tokens,
        "DeepSeek-R1-Distill-Qwen-1.5B": deepseek_tokens
    }

    # Sort the results in descending order based on token length
    sorted_results = sorted(results.items(), key=lambda x: x[1], reverse=True)

    return "\n".join([f"{model}: {tokens}" for model, tokens in sorted_results])


if __name__ == "__main__":
    qwen_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B", token=huggingface_token)
    deepseek_tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", token=huggingface_token)

    iface = gr.Interface(fn=tokenize, inputs=gr.Textbox(label="Input Text", lines=19), outputs="text")
    iface.launch()