ysharma HF Staff FalconLLM commited on
Commit
7f591f7
·
0 Parent(s):

Duplicate from tiiuae/falcon-180b-demo

Browse files

Co-authored-by: Falcon LLM TII UAE <FalconLLM@users.noreply.huggingface.co>

Files changed (6) hide show
  1. .gitattributes +34 -0
  2. README.md +10 -0
  3. app.py +145 -0
  4. better_banner.jpeg +0 -0
  5. home-banner.jpg +0 -0
  6. requirements.txt +3 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Falcon-180B Demo
3
+ emoji: 💬
4
+ colorFrom: indigo
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 3.42.0
8
+ app_file: app.py
9
+ duplicated_from: tiiuae/falcon-180b-demo
10
+ ---
app.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import shutil
4
+ import requests
5
+
6
+ import gradio as gr
7
+ from huggingface_hub import Repository, InferenceClient
8
+
9
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
10
+ API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-180B-chat"
11
+ BOT_NAME = "Falcon"
12
+
13
+ STOP_SEQUENCES = ["\nUser:", "<|endoftext|>", " User:", "###"]
14
+
15
+ EXAMPLES = [
16
+ ["Hey Falcon! Any recommendations for my holidays in Abu Dhabi?"],
17
+ ["What's the Everett interpretation of quantum mechanics?"],
18
+ ["Give me a list of the top 10 dive sites you would recommend around the world."],
19
+ ["Can you tell me more about deep-water soloing?"],
20
+ ["Can you write a short tweet about the release of our latest AI model, Falcon LLM?"]
21
+ ]
22
+
23
+ client = InferenceClient(
24
+ API_URL,
25
+ headers={"Authorization": f"Bearer {HF_TOKEN}"},
26
+ )
27
+
28
+ def format_prompt(message, history, system_prompt):
29
+ prompt = ""
30
+ if system_prompt:
31
+ prompt += f"System: {system_prompt}\n"
32
+ for user_prompt, bot_response in history:
33
+ prompt += f"User: {user_prompt}\n"
34
+ prompt += f"Falcon: {bot_response}\n" # Response already contains "Falcon: "
35
+ prompt += f"""User: {message}
36
+ Falcon:"""
37
+ return prompt
38
+
39
+ seed = 42
40
+
41
+ def generate(
42
+ prompt, history, system_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
43
+ ):
44
+ temperature = float(temperature)
45
+ if temperature < 1e-2:
46
+ temperature = 1e-2
47
+ top_p = float(top_p)
48
+ global seed
49
+ generate_kwargs = dict(
50
+ temperature=temperature,
51
+ max_new_tokens=max_new_tokens,
52
+ top_p=top_p,
53
+ repetition_penalty=repetition_penalty,
54
+ stop_sequences=STOP_SEQUENCES,
55
+ do_sample=True,
56
+ seed=seed,
57
+ )
58
+ seed = seed + 1
59
+ formatted_prompt = format_prompt(prompt, history, system_prompt)
60
+
61
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
62
+ output = ""
63
+
64
+ for response in stream:
65
+ output += response.token.text
66
+
67
+ for stop_str in STOP_SEQUENCES:
68
+ if output.endswith(stop_str):
69
+ output = output[:-len(stop_str)]
70
+ output = output.rstrip()
71
+ yield output
72
+ yield output
73
+ return output
74
+
75
+
76
+ additional_inputs=[
77
+ gr.Textbox("", label="Optional system prompt"),
78
+ gr.Slider(
79
+ label="Temperature",
80
+ value=0.9,
81
+ minimum=0.0,
82
+ maximum=1.0,
83
+ step=0.05,
84
+ interactive=True,
85
+ info="Higher values produce more diverse outputs",
86
+ ),
87
+ gr.Slider(
88
+ label="Max new tokens",
89
+ value=256,
90
+ minimum=0,
91
+ maximum=8192,
92
+ step=64,
93
+ interactive=True,
94
+ info="The maximum numbers of new tokens",
95
+ ),
96
+ gr.Slider(
97
+ label="Top-p (nucleus sampling)",
98
+ value=0.90,
99
+ minimum=0.0,
100
+ maximum=1,
101
+ step=0.05,
102
+ interactive=True,
103
+ info="Higher values sample more low-probability tokens",
104
+ ),
105
+ gr.Slider(
106
+ label="Repetition penalty",
107
+ value=1.2,
108
+ minimum=1.0,
109
+ maximum=2.0,
110
+ step=0.05,
111
+ interactive=True,
112
+ info="Penalize repeated tokens",
113
+ )
114
+ ]
115
+
116
+
117
+ with gr.Blocks() as demo:
118
+ with gr.Row():
119
+ with gr.Column(scale=0.4):
120
+ gr.Image("better_banner.jpeg", elem_id="banner-image", show_label=False)
121
+ with gr.Column():
122
+ gr.Markdown(
123
+ """# Falcon-180B Demo
124
+
125
+ **Chat with [Falcon-180B-Chat](https://huggingface.co/tiiuae/falcon-180b-chat), brainstorm ideas, discuss your holiday plans, and more!**
126
+
127
+ ✨ This demo is powered by [Falcon-180B](https://huggingface.co/tiiuae/falcon-180B) and finetuned on a mixture of [Ultrachat](https://huggingface.co/datasets/stingning/ultrachat), [Platypus](https://huggingface.co/datasets/garage-bAInd/Open-Platypus) and [Airoboros](https://huggingface.co/datasets/jondurbin/airoboros-2.1). [Falcon-180B](https://huggingface.co/tiiuae/falcon-180b) is a state-of-the-art large language model built by the [Technology Innovation Institute](https://www.tii.ae) in Abu Dhabi. It is trained on 3.5 trillion tokens (including [RefinedWeb](https://huggingface.co/datasets/tiiuae/falcon-refinedweb)) and available under the [Falcon-180B TII License](https://huggingface.co/spaces/tiiuae/falcon-180b-license/blob/main/LICENSE.txt). It currently holds the 🥇 1st place on the [🤗 Open LLM leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) for a pretrained model.
128
+
129
+ 🧪 This is only a **first experimental preview**: we intend to provide increasingly capable versions of Falcon in the future, based on improved datasets and RLHF/RLAIF.
130
+
131
+ 👀 **Learn more about Falcon LLM:** [falconllm.tii.ae](https://falconllm.tii.ae/)
132
+
133
+ ➡️️ **Intended Use**: this demo is intended to showcase an early finetuning of [Falcon-180B](https://huggingface.co/tiiuae/falcon-180b), to illustrate the impact (and limitations) of finetuning on a dataset of conversations and instructions. We encourage the community to further build upon the base model, and to create even better instruct/chat versions!
134
+
135
+ ⚠️ **Limitations**: the model can and will produce factually incorrect information, hallucinating facts and actions. As it has not undergone any advanced tuning/alignment, it can produce problematic outputs, especially if prompted to do so. Finally, this demo is limited to a session length of about 1,000 words.
136
+ """
137
+ )
138
+
139
+ gr.ChatInterface(
140
+ generate,
141
+ examples=EXAMPLES,
142
+ additional_inputs=additional_inputs,
143
+ )
144
+
145
+ demo.queue(concurrency_count=100, api_open=False).launch(show_api=False)
better_banner.jpeg ADDED
home-banner.jpg ADDED
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ huggingface_hub
2
+ gradio==3.42.0
3
+ text-generation==0.4.1