| |
| """ |
| Helion-2.5-Rnd Python Client |
| Easy-to-use client for interacting with Helion inference server |
| """ |
|
|
| import json |
| import requests |
| from typing import Dict, Generator, List, Optional, Union |
|
|
|
|
| class HelionClient: |
| """Client for Helion-2.5-Rnd inference API""" |
| |
| def __init__( |
| self, |
| base_url: str = "http://localhost:8000", |
| api_key: Optional[str] = None, |
| timeout: int = 300 |
| ): |
| """ |
| Initialize Helion client |
| |
| Args: |
| base_url: Base URL of the inference server |
| api_key: Optional API key for authentication |
| timeout: Request timeout in seconds |
| """ |
| self.base_url = base_url.rstrip('/') |
| self.timeout = timeout |
| self.headers = { |
| "Content-Type": "application/json" |
| } |
| if api_key: |
| self.headers["Authorization"] = f"Bearer {api_key}" |
| |
| def chat( |
| self, |
| messages: List[Dict[str, str]], |
| temperature: float = 0.7, |
| max_tokens: int = 4096, |
| stream: bool = False, |
| **kwargs |
| ) -> Union[str, Generator[str, None, None]]: |
| """ |
| Send a chat completion request |
| |
| Args: |
| messages: List of message dicts with 'role' and 'content' |
| temperature: Sampling temperature (0.0 to 2.0) |
| max_tokens: Maximum tokens to generate |
| stream: Whether to stream the response |
| **kwargs: Additional parameters |
| |
| Returns: |
| Generated text or generator for streaming |
| """ |
| payload = { |
| "messages": messages, |
| "temperature": temperature, |
| "max_tokens": max_tokens, |
| "stream": stream, |
| **kwargs |
| } |
| |
| if stream: |
| return self._stream_chat(payload) |
| else: |
| return self._complete_chat(payload) |
| |
| def _complete_chat(self, payload: Dict) -> str: |
| """Non-streaming chat completion""" |
| response = requests.post( |
| f"{self.base_url}/v1/chat/completions", |
| headers=self.headers, |
| json=payload, |
| timeout=self.timeout |
| ) |
| response.raise_for_status() |
| |
| data = response.json() |
| return data["choices"][0]["message"]["content"] |
| |
| def _stream_chat(self, payload: Dict) -> Generator[str, None, None]: |
| """Streaming chat completion""" |
| response = requests.post( |
| f"{self.base_url}/v1/chat/completions", |
| headers=self.headers, |
| json=payload, |
| stream=True, |
| timeout=self.timeout |
| ) |
| response.raise_for_status() |
| |
| for line in response.iter_lines(): |
| if line: |
| line = line.decode('utf-8') |
| if line.startswith('data: '): |
| data_str = line[6:] |
| if data_str == '[DONE]': |
| break |
| |
| try: |
| data = json.loads(data_str) |
| delta = data["choices"][0]["delta"].get("content", "") |
| if delta: |
| yield delta |
| except json.JSONDecodeError: |
| continue |
| |
| def complete( |
| self, |
| prompt: str, |
| temperature: float = 0.7, |
| max_tokens: int = 4096, |
| stream: bool = False, |
| **kwargs |
| ) -> Union[str, Generator[str, None, None]]: |
| """ |
| Send a text completion request |
| |
| Args: |
| prompt: Input text prompt |
| temperature: Sampling temperature |
| max_tokens: Maximum tokens to generate |
| stream: Whether to stream the response |
| **kwargs: Additional parameters |
| |
| Returns: |
| Generated text or generator for streaming |
| """ |
| messages = [{"role": "user", "content": prompt}] |
| return self.chat( |
| messages=messages, |
| temperature=temperature, |
| max_tokens=max_tokens, |
| stream=stream, |
| **kwargs |
| ) |
| |
| def health_check(self) -> Dict: |
| """Check server health""" |
| response = requests.get( |
| f"{self.base_url}/health", |
| headers=self.headers, |
| timeout=10 |
| ) |
| response.raise_for_status() |
| return response.json() |
| |
| def list_models(self) -> List[Dict]: |
| """List available models""" |
| response = requests.get( |
| f"{self.base_url}/v1/models", |
| headers=self.headers, |
| timeout=10 |
| ) |
| response.raise_for_status() |
| return response.json()["data"] |
|
|
|
|
| class HelionAssistant: |
| """High-level assistant interface for Helion""" |
| |
| def __init__( |
| self, |
| base_url: str = "http://localhost:8000", |
| system_prompt: Optional[str] = None, |
| **client_kwargs |
| ): |
| """ |
| Initialize Helion assistant |
| |
| Args: |
| base_url: Base URL of inference server |
| system_prompt: System prompt to use for all conversations |
| **client_kwargs: Additional arguments for HelionClient |
| """ |
| self.client = HelionClient(base_url=base_url, **client_kwargs) |
| self.system_prompt = system_prompt or ( |
| "You are Helion, an advanced AI assistant developed by DeepXR. " |
| "You are helpful, harmless, and honest." |
| ) |
| self.conversation_history: List[Dict[str, str]] = [] |
| |
| def chat( |
| self, |
| message: str, |
| temperature: float = 0.7, |
| max_tokens: int = 4096, |
| stream: bool = False, |
| reset_history: bool = False |
| ) -> Union[str, Generator[str, None, None]]: |
| """ |
| Chat with the assistant |
| |
| Args: |
| message: User message |
| temperature: Sampling temperature |
| max_tokens: Maximum tokens to generate |
| stream: Whether to stream the response |
| reset_history: Whether to reset conversation history |
| |
| Returns: |
| Assistant response |
| """ |
| if reset_history: |
| self.conversation_history = [] |
| |
| |
| messages = [{"role": "system", "content": self.system_prompt}] |
| messages.extend(self.conversation_history) |
| messages.append({"role": "user", "content": message}) |
| |
| |
| if stream: |
| return self._stream_and_store(messages, temperature, max_tokens, message) |
| else: |
| response = self.client.chat( |
| messages=messages, |
| temperature=temperature, |
| max_tokens=max_tokens, |
| stream=False |
| ) |
| |
| |
| self.conversation_history.append({"role": "user", "content": message}) |
| self.conversation_history.append({"role": "assistant", "content": response}) |
| |
| return response |
| |
| def _stream_and_store( |
| self, |
| messages: List[Dict], |
| temperature: float, |
| max_tokens: int, |
| user_message: str |
| ) -> Generator[str, None, None]: |
| """Stream response and store in history""" |
| full_response = "" |
| |
| for chunk in self.client.chat( |
| messages=messages, |
| temperature=temperature, |
| max_tokens=max_tokens, |
| stream=True |
| ): |
| full_response += chunk |
| yield chunk |
| |
| |
| self.conversation_history.append({"role": "user", "content": user_message}) |
| self.conversation_history.append({"role": "assistant", "content": full_response}) |
| |
| def reset(self): |
| """Reset conversation history""" |
| self.conversation_history = [] |
| |
| def get_history(self) -> List[Dict[str, str]]: |
| """Get conversation history""" |
| return self.conversation_history.copy() |
|
|
|
|
| |
| def example_usage(): |
| """Example usage of Helion client""" |
| |
| |
| client = HelionClient(base_url="http://localhost:8000") |
| |
| |
| health = client.health_check() |
| print(f"Server status: {health['status']}") |
| |
| |
| response = client.complete( |
| "Explain quantum computing in simple terms:", |
| temperature=0.7, |
| max_tokens=500 |
| ) |
| print(f"\nResponse: {response}") |
| |
| |
| messages = [ |
| {"role": "system", "content": "You are a helpful coding assistant."}, |
| {"role": "user", "content": "Write a Python function to calculate fibonacci numbers"} |
| ] |
| |
| response = client.chat(messages=messages, temperature=0.3) |
| print(f"\nCode: {response}") |
| |
| |
| print("\nStreaming response:") |
| for chunk in client.complete("Tell me a short story about AI:", stream=True): |
| print(chunk, end='', flush=True) |
| print() |
| |
| |
| assistant = HelionAssistant() |
| response = assistant.chat("What is machine learning?") |
| print(f"\nAssistant: {response}") |
| |
| |
| response = assistant.chat("Can you give me an example?") |
| print(f"\nAssistant: {response}") |
|
|
|
|
| if __name__ == "__main__": |
| example_usage() |