farrell236 commited on
Commit
38572a7
·
1 Parent(s): 6d2e9dd
Files changed (2) hide show
  1. .gitignore +1 -0
  2. app.py +17 -12
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .idea
app.py CHANGED
@@ -11,12 +11,11 @@ from threading import Thread
11
 
12
  import gradio as gr
13
  import torch
14
- import spaces
15
  from qwen_vl_utils import process_vision_info
16
  from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, TextIteratorStreamer
17
 
18
- # DEFAULT_CKPT_PATH = 'farrell236/test_model'
19
- DEFAULT_CKPT_PATH = 'Qwen/Qwen2.5-VL-32B-Instruct'
20
  AUTH_TOKEN = os.environ.get("HF_spaces")
21
 
22
  def _get_args():
@@ -148,10 +147,11 @@ def _transform_messages(original_messages):
148
 
149
  def _launch_demo(args, model, processor):
150
 
151
- # @spaces.GPU
152
  def call_local_model(model, processor, messages,
153
- max_tokens=1024, temperature=0.6,
154
- top_p=0.9, top_k=50,
 
 
155
  repetition_penalty=1.2):
156
 
157
  messages = _transform_messages(messages)
@@ -164,8 +164,8 @@ def _launch_demo(args, model, processor):
164
  tokenizer = processor.tokenizer
165
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
166
 
167
- gen_kwargs = {'max_new_tokens': max_tokens,
168
- 'streamer': streamer,
169
  'temperature': temperature,
170
  'top_p': top_p,
171
  'top_k': top_k,
@@ -209,7 +209,12 @@ def _launch_demo(args, model, processor):
209
  content = []
210
  messages.pop()
211
 
212
- for response in call_local_model(model, processor, messages):
 
 
 
 
 
213
  _chatbot[-1] = (_parse_text(chat_query), _remove_image_special(_parse_text(response)))
214
 
215
  yield _chatbot
@@ -281,7 +286,7 @@ def _launch_demo(args, model, processor):
281
  chatbot = gr.Chatbot(label='Qwen2.5-VL', elem_classes='control-height', height=500)
282
 
283
  with gr.Accordion("Generation Parameters", open=False):
284
- max_tokens = gr.Slider(64, 4096, value=512, step=64, label="Max Tokens")
285
  temperature = gr.Slider(0.0, 2.0, value=0.6, step=0.1, label="Temperature")
286
  top_p = gr.Slider(0.0, 1.0, value=0.9, step=0.05, label="Top-p (nucleus sampling)")
287
  top_k = gr.Slider(0, 100, value=50, step=1, label="Top-k")
@@ -299,8 +304,8 @@ def _launch_demo(args, model, processor):
299
  submit_btn.click(add_text,
300
  [chatbot, task_history, query],
301
  [chatbot, task_history]).then(predict,
302
- [chatbot, task_history, max_tokens,
303
- temperature, top_p, top_k, repetition_penalty],
304
  [chatbot], show_progress=True)
305
  submit_btn.click(reset_user_input, [], [query])
306
  empty_bin.click(reset_state, [chatbot, task_history], [chatbot], show_progress=True)
 
11
 
12
  import gradio as gr
13
  import torch
 
14
  from qwen_vl_utils import process_vision_info
15
  from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, TextIteratorStreamer
16
 
17
+ DEFAULT_CKPT_PATH = 'farrell236/test_model'
18
+ # DEFAULT_CKPT_PATH = '/scratch/llm-weights/Qwen/Qwen2.5-VL-7B-Instruct'
19
  AUTH_TOKEN = os.environ.get("HF_spaces")
20
 
21
  def _get_args():
 
147
 
148
  def _launch_demo(args, model, processor):
149
 
 
150
  def call_local_model(model, processor, messages,
151
+ max_tokens=1024,
152
+ temperature=0.6,
153
+ top_p=0.9,
154
+ top_k=50,
155
  repetition_penalty=1.2):
156
 
157
  messages = _transform_messages(messages)
 
164
  tokenizer = processor.tokenizer
165
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
166
 
167
+ gen_kwargs = {'streamer': streamer,
168
+ 'max_new_tokens': max_tokens,
169
  'temperature': temperature,
170
  'top_p': top_p,
171
  'top_k': top_k,
 
209
  content = []
210
  messages.pop()
211
 
212
+ for response in call_local_model(model, processor, messages,
213
+ max_tokens=max_tokens,
214
+ temperature=temperature,
215
+ top_p=top_p,
216
+ top_k=top_k,
217
+ repetition_penalty=repetition_penalty):
218
  _chatbot[-1] = (_parse_text(chat_query), _remove_image_special(_parse_text(response)))
219
 
220
  yield _chatbot
 
286
  chatbot = gr.Chatbot(label='Qwen2.5-VL', elem_classes='control-height', height=500)
287
 
288
  with gr.Accordion("Generation Parameters", open=False):
289
+ max_tokens = gr.Slider(64, 4096, value=1024, step=64, label="Max Tokens")
290
  temperature = gr.Slider(0.0, 2.0, value=0.6, step=0.1, label="Temperature")
291
  top_p = gr.Slider(0.0, 1.0, value=0.9, step=0.05, label="Top-p (nucleus sampling)")
292
  top_k = gr.Slider(0, 100, value=50, step=1, label="Top-k")
 
304
  submit_btn.click(add_text,
305
  [chatbot, task_history, query],
306
  [chatbot, task_history]).then(predict,
307
+ [chatbot, task_history,
308
+ max_tokens, temperature, top_p, top_k, repetition_penalty],
309
  [chatbot], show_progress=True)
310
  submit_btn.click(reset_user_input, [], [query])
311
  empty_bin.click(reset_state, [chatbot, task_history], [chatbot], show_progress=True)