GilbertAkham commited on
Commit
5297ff3
·
verified ·
1 Parent(s): 3590b90

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +21 -5
handler.py CHANGED
@@ -1,12 +1,18 @@
1
- # handler.py
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from peft import PeftModel
5
  from huggingface_hub import snapshot_download
6
 
 
7
  BASE_MODEL = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
8
  ADAPTER_PATH = "GilbertAkham/deepseek-R1-multitask-lora"
9
 
 
 
 
 
 
 
10
  class EndpointHandler:
11
  def __init__(self, path=""):
12
  print("🚀 Loading base model...")
@@ -30,17 +36,27 @@ class EndpointHandler:
30
  print("✅ Model + LoRA adapter loaded successfully.")
31
 
32
  def __call__(self, data):
 
33
  prompt = data.get("inputs", "")
34
- inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
 
 
 
 
 
 
 
 
35
  with torch.no_grad():
36
  outputs = self.model.generate(
37
  **inputs,
38
- max_new_tokens=512,
39
- temperature=0.7,
40
- top_p=0.9,
41
  do_sample=True,
42
  pad_token_id=self.tokenizer.eos_token_id,
43
  eos_token_id=self.tokenizer.eos_token_id,
44
  )
 
45
  text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
46
  return {"generated_text": text}
 
 
1
  import torch
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  from peft import PeftModel
4
  from huggingface_hub import snapshot_download
5
 
6
+ # === Base & adapter config ===
7
  BASE_MODEL = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
8
  ADAPTER_PATH = "GilbertAkham/deepseek-R1-multitask-lora"
9
 
10
+ # === System message (always prepended to input) ===
11
+ SYSTEM_PROMPT = (
12
+ "You are Chat-Bot, a helpful and logical assistant trained for reasoning, "
13
+ "email, chatting, summarization, story continuation, and report writing.\n\n"
14
+ )
15
+
16
  class EndpointHandler:
17
  def __init__(self, path=""):
18
  print("🚀 Loading base model...")
 
36
  print("✅ Model + LoRA adapter loaded successfully.")
37
 
38
  def __call__(self, data):
39
+ # === Combine system + user prompt ===
40
  prompt = data.get("inputs", "")
41
+ full_prompt = SYSTEM_PROMPT + prompt
42
+
43
+ params = data.get("parameters", {})
44
+ max_new_tokens = params.get("max_new_tokens", 512)
45
+ temperature = params.get("temperature", 0.7)
46
+ top_p = params.get("top_p", 0.9)
47
+
48
+ # === Tokenize and run generation ===
49
+ inputs = self.tokenizer(full_prompt, return_tensors="pt").to(self.model.device)
50
  with torch.no_grad():
51
  outputs = self.model.generate(
52
  **inputs,
53
+ max_new_tokens=max_new_tokens,
54
+ temperature=temperature,
55
+ top_p=top_p,
56
  do_sample=True,
57
  pad_token_id=self.tokenizer.eos_token_id,
58
  eos_token_id=self.tokenizer.eos_token_id,
59
  )
60
+
61
  text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
62
  return {"generated_text": text}