| | |
| | """ |
| | Helion-2.5-Rnd Python Client |
| | Easy-to-use client for interacting with Helion inference server |
| | """ |
| |
|
| | import json |
| | import requests |
| | from typing import Dict, Generator, List, Optional, Union |
| |
|
| |
|
| | class HelionClient: |
| | """Client for Helion-2.5-Rnd inference API""" |
| | |
| | def __init__( |
| | self, |
| | base_url: str = "http://localhost:8000", |
| | api_key: Optional[str] = None, |
| | timeout: int = 300 |
| | ): |
| | """ |
| | Initialize Helion client |
| | |
| | Args: |
| | base_url: Base URL of the inference server |
| | api_key: Optional API key for authentication |
| | timeout: Request timeout in seconds |
| | """ |
| | self.base_url = base_url.rstrip('/') |
| | self.timeout = timeout |
| | self.headers = { |
| | "Content-Type": "application/json" |
| | } |
| | if api_key: |
| | self.headers["Authorization"] = f"Bearer {api_key}" |
| | |
| | def chat( |
| | self, |
| | messages: List[Dict[str, str]], |
| | temperature: float = 0.7, |
| | max_tokens: int = 4096, |
| | stream: bool = False, |
| | **kwargs |
| | ) -> Union[str, Generator[str, None, None]]: |
| | """ |
| | Send a chat completion request |
| | |
| | Args: |
| | messages: List of message dicts with 'role' and 'content' |
| | temperature: Sampling temperature (0.0 to 2.0) |
| | max_tokens: Maximum tokens to generate |
| | stream: Whether to stream the response |
| | **kwargs: Additional parameters |
| | |
| | Returns: |
| | Generated text or generator for streaming |
| | """ |
| | payload = { |
| | "messages": messages, |
| | "temperature": temperature, |
| | "max_tokens": max_tokens, |
| | "stream": stream, |
| | **kwargs |
| | } |
| | |
| | if stream: |
| | return self._stream_chat(payload) |
| | else: |
| | return self._complete_chat(payload) |
| | |
| | def _complete_chat(self, payload: Dict) -> str: |
| | """Non-streaming chat completion""" |
| | response = requests.post( |
| | f"{self.base_url}/v1/chat/completions", |
| | headers=self.headers, |
| | json=payload, |
| | timeout=self.timeout |
| | ) |
| | response.raise_for_status() |
| | |
| | data = response.json() |
| | return data["choices"][0]["message"]["content"] |
| | |
| | def _stream_chat(self, payload: Dict) -> Generator[str, None, None]: |
| | """Streaming chat completion""" |
| | response = requests.post( |
| | f"{self.base_url}/v1/chat/completions", |
| | headers=self.headers, |
| | json=payload, |
| | stream=True, |
| | timeout=self.timeout |
| | ) |
| | response.raise_for_status() |
| | |
| | for line in response.iter_lines(): |
| | if line: |
| | line = line.decode('utf-8') |
| | if line.startswith('data: '): |
| | data_str = line[6:] |
| | if data_str == '[DONE]': |
| | break |
| | |
| | try: |
| | data = json.loads(data_str) |
| | delta = data["choices"][0]["delta"].get("content", "") |
| | if delta: |
| | yield delta |
| | except json.JSONDecodeError: |
| | continue |
| | |
| | def complete( |
| | self, |
| | prompt: str, |
| | temperature: float = 0.7, |
| | max_tokens: int = 4096, |
| | stream: bool = False, |
| | **kwargs |
| | ) -> Union[str, Generator[str, None, None]]: |
| | """ |
| | Send a text completion request |
| | |
| | Args: |
| | prompt: Input text prompt |
| | temperature: Sampling temperature |
| | max_tokens: Maximum tokens to generate |
| | stream: Whether to stream the response |
| | **kwargs: Additional parameters |
| | |
| | Returns: |
| | Generated text or generator for streaming |
| | """ |
| | messages = [{"role": "user", "content": prompt}] |
| | return self.chat( |
| | messages=messages, |
| | temperature=temperature, |
| | max_tokens=max_tokens, |
| | stream=stream, |
| | **kwargs |
| | ) |
| | |
| | def health_check(self) -> Dict: |
| | """Check server health""" |
| | response = requests.get( |
| | f"{self.base_url}/health", |
| | headers=self.headers, |
| | timeout=10 |
| | ) |
| | response.raise_for_status() |
| | return response.json() |
| | |
| | def list_models(self) -> List[Dict]: |
| | """List available models""" |
| | response = requests.get( |
| | f"{self.base_url}/v1/models", |
| | headers=self.headers, |
| | timeout=10 |
| | ) |
| | response.raise_for_status() |
| | return response.json()["data"] |
| |
|
| |
|
| | class HelionAssistant: |
| | """High-level assistant interface for Helion""" |
| | |
| | def __init__( |
| | self, |
| | base_url: str = "http://localhost:8000", |
| | system_prompt: Optional[str] = None, |
| | **client_kwargs |
| | ): |
| | """ |
| | Initialize Helion assistant |
| | |
| | Args: |
| | base_url: Base URL of inference server |
| | system_prompt: System prompt to use for all conversations |
| | **client_kwargs: Additional arguments for HelionClient |
| | """ |
| | self.client = HelionClient(base_url=base_url, **client_kwargs) |
| | self.system_prompt = system_prompt or ( |
| | "You are Helion, an advanced AI assistant developed by DeepXR. " |
| | "You are helpful, harmless, and honest." |
| | ) |
| | self.conversation_history: List[Dict[str, str]] = [] |
| | |
| | def chat( |
| | self, |
| | message: str, |
| | temperature: float = 0.7, |
| | max_tokens: int = 4096, |
| | stream: bool = False, |
| | reset_history: bool = False |
| | ) -> Union[str, Generator[str, None, None]]: |
| | """ |
| | Chat with the assistant |
| | |
| | Args: |
| | message: User message |
| | temperature: Sampling temperature |
| | max_tokens: Maximum tokens to generate |
| | stream: Whether to stream the response |
| | reset_history: Whether to reset conversation history |
| | |
| | Returns: |
| | Assistant response |
| | """ |
| | if reset_history: |
| | self.conversation_history = [] |
| | |
| | |
| | messages = [{"role": "system", "content": self.system_prompt}] |
| | messages.extend(self.conversation_history) |
| | messages.append({"role": "user", "content": message}) |
| | |
| | |
| | if stream: |
| | return self._stream_and_store(messages, temperature, max_tokens, message) |
| | else: |
| | response = self.client.chat( |
| | messages=messages, |
| | temperature=temperature, |
| | max_tokens=max_tokens, |
| | stream=False |
| | ) |
| | |
| | |
| | self.conversation_history.append({"role": "user", "content": message}) |
| | self.conversation_history.append({"role": "assistant", "content": response}) |
| | |
| | return response |
| | |
| | def _stream_and_store( |
| | self, |
| | messages: List[Dict], |
| | temperature: float, |
| | max_tokens: int, |
| | user_message: str |
| | ) -> Generator[str, None, None]: |
| | """Stream response and store in history""" |
| | full_response = "" |
| | |
| | for chunk in self.client.chat( |
| | messages=messages, |
| | temperature=temperature, |
| | max_tokens=max_tokens, |
| | stream=True |
| | ): |
| | full_response += chunk |
| | yield chunk |
| | |
| | |
| | self.conversation_history.append({"role": "user", "content": user_message}) |
| | self.conversation_history.append({"role": "assistant", "content": full_response}) |
| | |
| | def reset(self): |
| | """Reset conversation history""" |
| | self.conversation_history = [] |
| | |
| | def get_history(self) -> List[Dict[str, str]]: |
| | """Get conversation history""" |
| | return self.conversation_history.copy() |
| |
|
| |
|
| | |
| | def example_usage(): |
| | """Example usage of Helion client""" |
| | |
| | |
| | client = HelionClient(base_url="http://localhost:8000") |
| | |
| | |
| | health = client.health_check() |
| | print(f"Server status: {health['status']}") |
| | |
| | |
| | response = client.complete( |
| | "Explain quantum computing in simple terms:", |
| | temperature=0.7, |
| | max_tokens=500 |
| | ) |
| | print(f"\nResponse: {response}") |
| | |
| | |
| | messages = [ |
| | {"role": "system", "content": "You are a helpful coding assistant."}, |
| | {"role": "user", "content": "Write a Python function to calculate fibonacci numbers"} |
| | ] |
| | |
| | response = client.chat(messages=messages, temperature=0.3) |
| | print(f"\nCode: {response}") |
| | |
| | |
| | print("\nStreaming response:") |
| | for chunk in client.complete("Tell me a short story about AI:", stream=True): |
| | print(chunk, end='', flush=True) |
| | print() |
| | |
| | |
| | assistant = HelionAssistant() |
| | response = assistant.chat("What is machine learning?") |
| | print(f"\nAssistant: {response}") |
| | |
| | |
| | response = assistant.chat("Can you give me an example?") |
| | print(f"\nAssistant: {response}") |
| |
|
| |
|
| | if __name__ == "__main__": |
| | example_usage() |