Spaces:

GilbertAkham
/

GJAI-R1

Sleeping

App Files Files Community

GilbertAkham commited on Oct 31, 2025

Commit

a4a40af

verified ·

1 Parent(s): f36d844

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -47

app.py CHANGED Viewed

@@ -1,85 +1,117 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
 # -------------------------------------------------
-# Inference function (using text generation)
 # -------------------------------------------------
-def respond(
-    message,
-    history: list[dict[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-    hf_token: gr.OAuthToken,
-):
     """
-    Chat interface for Gilbert Multitask Reasoning Model.
-    Uses text_generation() because model doesn't support chat_completion().
     """
-    client = InferenceClient(
-        token=hf_token.token,
-        model="GilbertAkham/deepseek-R1-multitask-lora"
-    )
-    # Build conversation prompt manually
-    history_text = ""
     for turn in history:
-        history_text += f"User: {turn['role']}\n{turn['content']}\n"
-    prompt = f"{system_message}\n\n{history_text}\nUser: {message}\nAssistant:"
-    response = ""
-    for token in client.text_generation(
-        prompt,
-        max_new_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        response += token
-        yield response
 # -------------------------------------------------
-# Gradio ChatInterface setup
 # -------------------------------------------------
 chatbot = gr.ChatInterface(
-    respond,
     type="messages",
     additional_inputs=[
         gr.Textbox(
             value=(
-                "You are Reasoning-Bot, a multitask reasoning AI trained for "
-                "chatting, summarization, creative writing, reasoning, and formal replies."
             ),
-            label="🧠 System Message"
         ),
-        gr.Slider(minimum=64, maximum=2048, value=512, step=16, label="📏 Max New Tokens"),
-        gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="🌡️ Temperature"),
-        gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="🎯 Top-p (nucleus sampling)"),
     ],
 )
 # -------------------------------------------------
-# Layout with sidebar
 # -------------------------------------------------
 with gr.Blocks(title="Gilbert Multitask Reasoning AI") as demo:
     with gr.Sidebar():
-        gr.Markdown("## 🔐 Authentication")
-        gr.LoginButton()
         gr.Markdown(
             """
-            ### 💡 About
-            - Model: **GilbertAkham/deepseek-R1-multitask-lora**
-            - Base: DeepSeek-R1-Distill-Qwen-1.5B
-            - Tasks: Chat, email, reasoning, story continuation, summarization.
             """
         )
     chatbot.render()
 # -------------------------------------------------
-# Launch Space
 # -------------------------------------------------
 if __name__ == "__main__":
     demo.launch()

+import torch
 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+from peft import PeftModel
 # -------------------------------------------------
+# MODEL LOADING
 # -------------------------------------------------
+BASE_MODEL = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
+ADAPTER_MODEL = "GilbertAkham/deepseek-R1-multitask-lora"
+print("🔄 Loading base model and LoRA adapter...")
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,                   # load in 4-bit for GPU memory efficiency
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16,
+)
+tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
+base_model = AutoModelForCausalLM.from_pretrained(
+    BASE_MODEL,
+    device_map="auto",
+    quantization_config=bnb_config,
+    torch_dtype=torch.float16,
+    trust_remote_code=True,
+)
+model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL)
+model.eval()
+print("✅ Model and tokenizer loaded successfully!")
+# -------------------------------------------------
+# CHAT / GENERATION FUNCTION
+# -------------------------------------------------
+def generate_response(message, history, system_message, max_tokens, temperature, top_p):
     """
+    Generates text using the multitask LoRA model.
+    Supports chat, reasoning, summarization, storytelling, etc.
     """
+    # Construct a conversation-style prompt
+    prompt = f"{system_message}\n\n"
     for turn in history:
+        prompt += f"User: {turn['content']}\nAssistant: {turn.get('response', '')}\n"
+    prompt += f"User: {message}\nAssistant:"
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        output = model.generate(
+            **inputs,
+            max_new_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id,
+            repetition_penalty=1.1,
+        )
+    text = tokenizer.decode(output[0], skip_special_tokens=True)
+    # Extract only the Assistant’s answer
+    answer = text.split("Assistant:")[-1].strip()
+    return answer
 # -------------------------------------------------
+# GRADIO CHAT INTERFACE
 # -------------------------------------------------
 chatbot = gr.ChatInterface(
+    fn=generate_response,
     type="messages",
     additional_inputs=[
         gr.Textbox(
             value=(
+                "You are Reasoning-Bot, a helpful and logical assistant trained for "
+                "reasoning, chatting, summarization, story continuation, and report writing."
             ),
+            label="🧠 System Message",
         ),
+        gr.Slider(64, 2048, value=512, step=16, label="📏 Max New Tokens"),
+        gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="🌡️ Temperature"),
+        gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="🎯 Top-p"),
     ],
 )
 # -------------------------------------------------
+# UI LAYOUT
 # -------------------------------------------------
 with gr.Blocks(title="Gilbert Multitask Reasoning AI") as demo:
     with gr.Sidebar():
+        gr.Markdown("## 💡 About This App")
         gr.Markdown(
             """
+            - **Model:** `GilbertAkham/deepseek-R1-multitask-lora`
+            - **Base:** `deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B`
+            - **Capabilities:**
+              🧩 Reasoning, 🗣️ Chat, 📧 Email writing, 📚 Summarization, ✍️ Story continuation, 🧾 Report generation
+            - **Runs locally** (no Inference API required).
             """
         )
     chatbot.render()
 # -------------------------------------------------
+# LAUNCH
 # -------------------------------------------------
 if __name__ == "__main__":
     demo.launch()