Spaces:
Sleeping
Sleeping
Mustehson
commited on
Commit
·
bb41ea7
1
Parent(s):
2833068
Added Langsmith
Browse files- app.py +4 -2
- requirements.txt +6 -5
app.py
CHANGED
|
@@ -8,6 +8,8 @@ from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
|
| 8 |
from langchain_community.document_loaders import RecursiveUrlLoader
|
| 9 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 10 |
from langchain_community.document_transformers import Html2TextTransformer
|
|
|
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
TAB_LINES = 22
|
|
@@ -52,7 +54,7 @@ def scrape_text(url, max_depth):
|
|
| 52 |
return None
|
| 53 |
return documents
|
| 54 |
|
| 55 |
-
|
| 56 |
def clean_text(docs):
|
| 57 |
html2text = Html2TextTransformer()
|
| 58 |
docs_transformed = html2text.transform_documents(docs)
|
|
@@ -93,7 +95,7 @@ def format_page_content(docs):
|
|
| 93 |
formatted_docs += "\n\n---\n\n"
|
| 94 |
return formatted_docs
|
| 95 |
|
| 96 |
-
|
| 97 |
def get_tables(raw_docs):
|
| 98 |
tables_list = []
|
| 99 |
for raw_doc in raw_docs:
|
|
|
|
| 8 |
from langchain_community.document_loaders import RecursiveUrlLoader
|
| 9 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 10 |
from langchain_community.document_transformers import Html2TextTransformer
|
| 11 |
+
from langsmith import traceable
|
| 12 |
+
|
| 13 |
|
| 14 |
|
| 15 |
TAB_LINES = 22
|
|
|
|
| 54 |
return None
|
| 55 |
return documents
|
| 56 |
|
| 57 |
+
@traceable()
|
| 58 |
def clean_text(docs):
|
| 59 |
html2text = Html2TextTransformer()
|
| 60 |
docs_transformed = html2text.transform_documents(docs)
|
|
|
|
| 95 |
formatted_docs += "\n\n---\n\n"
|
| 96 |
return formatted_docs
|
| 97 |
|
| 98 |
+
@traceable()
|
| 99 |
def get_tables(raw_docs):
|
| 100 |
tables_list = []
|
| 101 |
for raw_doc in raw_docs:
|
requirements.txt
CHANGED
|
@@ -1,10 +1,11 @@
|
|
| 1 |
-
pandas
|
| 2 |
-
langchain
|
| 3 |
-
langchain-community
|
| 4 |
langchain-text-splitters
|
| 5 |
html2text
|
| 6 |
lxml
|
| 7 |
beautifulsoup4
|
| 8 |
html5lib
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
langchain-text-splitters
|
| 2 |
html2text
|
| 3 |
lxml
|
| 4 |
beautifulsoup4
|
| 5 |
html5lib
|
| 6 |
+
pandas==2.2.2
|
| 7 |
+
langchain==0.3.3
|
| 8 |
+
langchain-community==0.3.2
|
| 9 |
+
langsmith==0.1.135
|
| 10 |
+
duckdb==1.1.1
|
| 11 |
+
sentence_transformers==3.2.0
|