| """Util that calls Wikipedia. references: https://github.com/hwchase17/langchain/blob/9b615022e2b6a3591347ad77a3e21aad6cf24c49/docs/extras/modules/agents/tools/integrations/wikipedia.ipynb#L36""" |
| import logging |
| from typing import Any, Dict, List, Optional |
|
|
| from pydantic import BaseModel, root_validator |
|
|
| logger = logging.getLogger(__name__) |
|
|
| WIKIPEDIA_MAX_QUERY_LENGTH = 300 |
|
|
|
|
| class WikipediaAPIWrapper(BaseModel): |
| """Wrapper around WikipediaAPI. |
| |
| To use, you should have the ``wikipedia`` python package installed. |
| This wrapper will use the Wikipedia API to conduct searches and |
| fetch page summaries. By default, it will return the page summaries |
| of the top-k results. |
| It limits the Document content by doc_content_chars_max. |
| |
| :param top_k_results: The number of results to return. |
| :type top_k_results: int |
| :param lang: The language to use for the Wikipedia API. |
| :type lang: str |
| :param doc_content_chars_max: The maximum number of characters in the Document content. |
| :type doc_content_chars_max: int |
| :wiki_client: The Wikipedia API client. |
| """ |
|
|
| wiki_client: Any |
| top_k_results: int = 5 |
| lang: str = "en" |
| doc_content_chars_max: int = 4000 |
|
|
| @root_validator(pre=True) |
| def validate_environment(cls, values: Dict) -> Dict: |
| """Validate that the python package exists in environment. |
| |
| :param values: The values to validate. |
| :type values: Dict |
| :return: The validated values. |
| :rtype: Dict |
| :raises ImportError: If the package is not installed. |
| """ |
| try: |
| import wikipedia |
|
|
| wikipedia.set_lang(values["lang"]) |
| values["wiki_client"] = wikipedia |
| except ImportError: |
| raise ImportError( |
| "Could not import wikipedia python package. " |
| "Please install it with `pip install wikipedia`." |
| ) |
| return values |
|
|
| def run(self, query: str) -> str: |
| """Run Wikipedia search and get page summaries. |
| |
| :param query: The query to search for. |
| :type query: str |
| :return: The page summaries. |
| :rtype: str |
| """ |
|
|
| page_titles = self.search_page_titles(query) |
| summaries = [] |
| for page_title in page_titles: |
| if wiki_page := self._fetch_page(page_title): |
| if summary := self._formatted_page_summary(page_title, wiki_page): |
| summaries.append(summary) |
| if not summaries: |
| return "No good Wikipedia Search Result was found" |
| return "\n\n".join(summaries)[: self.doc_content_chars_max] |
|
|
| def _fetch_page(self, page: str) -> Optional[str]: |
| """ Fetch page content from Wikipedia. |
| |
| :param page: The page to fetch. |
| :type page: str |
| :return: The page content. |
| :rtype: Optional[str] |
| """ |
| try: |
| return self.wiki_client.page(title=page, auto_suggest=False).content[: self.doc_content_chars_max] |
| except ( |
| self.wiki_client.exceptions.PageError, |
| self.wiki_client.exceptions.DisambiguationError, |
| ): |
| return None |
|
|
| def search_page_titles(self, query: str) -> List[str]: |
| """Run Wikipedia search and get page summaries. |
| |
| :param query: The query to search for. |
| :type query: str |
| :return: The page titles. |
| :rtype: List[str] |
| """ |
|
|
| return self.wiki_client.search(query[:WIKIPEDIA_MAX_QUERY_LENGTH])[:self.top_k_results] |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| @staticmethod |
| def _formatted_page_summary(page_title: str, wiki_page: Any) -> Optional[str]: |
| """ Format the page and summary in a single string. |
| |
| :param page_title: The page title. |
| :type page_title: str |
| :param wiki_page: The Wikipedia page. |
| :type wiki_page: Any |
| :return: The formatted page summary. |
| :rtype: Optional[str] |
| """ |
| return f"Page: {page_title}\nSummary: {wiki_page.summary}" |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |