GilbertAkham commited on
Commit
a4a40af
Β·
verified Β·
1 Parent(s): f36d844

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -47
app.py CHANGED
@@ -1,85 +1,117 @@
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
3
 
4
  # -------------------------------------------------
5
- # Inference function (using text generation)
6
  # -------------------------------------------------
7
- def respond(
8
- message,
9
- history: list[dict[str, str]],
10
- system_message,
11
- max_tokens,
12
- temperature,
13
- top_p,
14
- hf_token: gr.OAuthToken,
15
- ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  """
17
- Chat interface for Gilbert Multitask Reasoning Model.
18
- Uses text_generation() because model doesn't support chat_completion().
19
  """
20
- client = InferenceClient(
21
- token=hf_token.token,
22
- model="GilbertAkham/deepseek-R1-multitask-lora"
23
- )
24
 
25
- # Build conversation prompt manually
26
- history_text = ""
27
  for turn in history:
28
- history_text += f"User: {turn['role']}\n{turn['content']}\n"
 
 
 
29
 
30
- prompt = f"{system_message}\n\n{history_text}\nUser: {message}\nAssistant:"
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
- response = ""
33
- for token in client.text_generation(
34
- prompt,
35
- max_new_tokens=max_tokens,
36
- stream=True,
37
- temperature=temperature,
38
- top_p=top_p,
39
- ):
40
- response += token
41
- yield response
42
 
43
 
44
  # -------------------------------------------------
45
- # Gradio ChatInterface setup
46
  # -------------------------------------------------
47
  chatbot = gr.ChatInterface(
48
- respond,
49
  type="messages",
50
  additional_inputs=[
51
  gr.Textbox(
52
  value=(
53
- "You are Reasoning-Bot, a multitask reasoning AI trained for "
54
- "chatting, summarization, creative writing, reasoning, and formal replies."
55
  ),
56
- label="🧠 System Message"
57
  ),
58
- gr.Slider(minimum=64, maximum=2048, value=512, step=16, label="πŸ“ Max New Tokens"),
59
- gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="🌑️ Temperature"),
60
- gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="🎯 Top-p (nucleus sampling)"),
61
  ],
62
  )
63
 
 
64
  # -------------------------------------------------
65
- # Layout with sidebar
66
  # -------------------------------------------------
67
  with gr.Blocks(title="Gilbert Multitask Reasoning AI") as demo:
68
  with gr.Sidebar():
69
- gr.Markdown("## πŸ” Authentication")
70
- gr.LoginButton()
71
  gr.Markdown(
72
  """
73
- ### πŸ’‘ About
74
- - Model: **GilbertAkham/deepseek-R1-multitask-lora**
75
- - Base: DeepSeek-R1-Distill-Qwen-1.5B
76
- - Tasks: Chat, email, reasoning, story continuation, summarization.
 
77
  """
78
  )
79
  chatbot.render()
80
 
 
81
  # -------------------------------------------------
82
- # Launch Space
83
  # -------------------------------------------------
84
  if __name__ == "__main__":
85
  demo.launch()
 
1
+ import torch
2
  import gradio as gr
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
4
+ from peft import PeftModel
5
 
6
  # -------------------------------------------------
7
+ # MODEL LOADING
8
  # -------------------------------------------------
9
+ BASE_MODEL = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
10
+ ADAPTER_MODEL = "GilbertAkham/deepseek-R1-multitask-lora"
11
+
12
+ print("πŸ”„ Loading base model and LoRA adapter...")
13
+
14
+ bnb_config = BitsAndBytesConfig(
15
+ load_in_4bit=True, # load in 4-bit for GPU memory efficiency
16
+ bnb_4bit_use_double_quant=True,
17
+ bnb_4bit_quant_type="nf4",
18
+ bnb_4bit_compute_dtype=torch.float16,
19
+ )
20
+
21
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
22
+ if tokenizer.pad_token is None:
23
+ tokenizer.pad_token = tokenizer.eos_token
24
+
25
+ base_model = AutoModelForCausalLM.from_pretrained(
26
+ BASE_MODEL,
27
+ device_map="auto",
28
+ quantization_config=bnb_config,
29
+ torch_dtype=torch.float16,
30
+ trust_remote_code=True,
31
+ )
32
+
33
+ model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL)
34
+ model.eval()
35
+
36
+ print("βœ… Model and tokenizer loaded successfully!")
37
+
38
+
39
+ # -------------------------------------------------
40
+ # CHAT / GENERATION FUNCTION
41
+ # -------------------------------------------------
42
+ def generate_response(message, history, system_message, max_tokens, temperature, top_p):
43
  """
44
+ Generates text using the multitask LoRA model.
45
+ Supports chat, reasoning, summarization, storytelling, etc.
46
  """
 
 
 
 
47
 
48
+ # Construct a conversation-style prompt
49
+ prompt = f"{system_message}\n\n"
50
  for turn in history:
51
+ prompt += f"User: {turn['content']}\nAssistant: {turn.get('response', '')}\n"
52
+ prompt += f"User: {message}\nAssistant:"
53
+
54
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
55
 
56
+ with torch.no_grad():
57
+ output = model.generate(
58
+ **inputs,
59
+ max_new_tokens=max_tokens,
60
+ temperature=temperature,
61
+ top_p=top_p,
62
+ do_sample=True,
63
+ pad_token_id=tokenizer.eos_token_id,
64
+ repetition_penalty=1.1,
65
+ )
66
+
67
+ text = tokenizer.decode(output[0], skip_special_tokens=True)
68
+ # Extract only the Assistant’s answer
69
+ answer = text.split("Assistant:")[-1].strip()
70
 
71
+ return answer
 
 
 
 
 
 
 
 
 
72
 
73
 
74
  # -------------------------------------------------
75
+ # GRADIO CHAT INTERFACE
76
  # -------------------------------------------------
77
  chatbot = gr.ChatInterface(
78
+ fn=generate_response,
79
  type="messages",
80
  additional_inputs=[
81
  gr.Textbox(
82
  value=(
83
+ "You are Reasoning-Bot, a helpful and logical assistant trained for "
84
+ "reasoning, chatting, summarization, story continuation, and report writing."
85
  ),
86
+ label="🧠 System Message",
87
  ),
88
+ gr.Slider(64, 2048, value=512, step=16, label="πŸ“ Max New Tokens"),
89
+ gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="🌑️ Temperature"),
90
+ gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="🎯 Top-p"),
91
  ],
92
  )
93
 
94
+
95
  # -------------------------------------------------
96
+ # UI LAYOUT
97
  # -------------------------------------------------
98
  with gr.Blocks(title="Gilbert Multitask Reasoning AI") as demo:
99
  with gr.Sidebar():
100
+ gr.Markdown("## πŸ’‘ About This App")
 
101
  gr.Markdown(
102
  """
103
+ - **Model:** `GilbertAkham/deepseek-R1-multitask-lora`
104
+ - **Base:** `deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B`
105
+ - **Capabilities:**
106
+ 🧩 Reasoning, πŸ—£οΈ Chat, πŸ“§ Email writing, πŸ“š Summarization, ✍️ Story continuation, 🧾 Report generation
107
+ - **Runs locally** (no Inference API required).
108
  """
109
  )
110
  chatbot.render()
111
 
112
+
113
  # -------------------------------------------------
114
+ # LAUNCH
115
  # -------------------------------------------------
116
  if __name__ == "__main__":
117
  demo.launch()