Spaces:
Sleeping
Sleeping
| import os | |
| from flask import Flask, request,render_template, send_from_directory | |
| from flask import Flask, request, jsonify, render_template_string | |
| import openai | |
| import langchain | |
| import os | |
| from langchain.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.embeddings.openai import OpenAIEmbeddings | |
| from langchain.vectorstores import Chroma | |
| from langchain import OpenAI | |
| from langchain.schema import Document | |
| from langchain.chains.question_answering import load_qa_chain | |
| from langchain.chains import RetrievalQA | |
| from langchain.chat_models import ChatOpenAI | |
| import re | |
| from pdfminer.high_level import extract_pages | |
| from pdfminer.layout import LTTextContainer, LTChar | |
| from langchain.prompts import PromptTemplate | |
| from langchain.chains import LLMChain | |
| import json | |
| import pdfplumber | |
| import fitz # PyMuPDF | |
| import chromadb | |
| from chromadb.config import Settings | |
| app = Flask(__name__) | |
| os.environ["OPENAI_API_KEY"] = "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" | |
| app.config['UPLOAD_FOLDER'] = 'uploads' | |
| openapi_key="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" | |
| fpath="./SCHOOL ADMISSI0N TEST 2025-2026.pdf" | |
| text_splitter=RecursiveCharacterTextSplitter(chunk_size=800,chunk_overlap=50,length_function=len,separators=["\n\n","\n"," "]) | |
| global_var = "I am global" | |
| def loadmiyapurcourtorders(pathtosjon): | |
| try: | |
| with open(pathtosjon, "r", encoding="utf-8") as f: | |
| print("--processing pls wait---") | |
| courtcontent = json.load(f) | |
| docs = [] | |
| for content in courtcontent: | |
| content1 = f""" | |
| Court Level: {content["court_level"]} | |
| Case No: {content["caseno"]} | |
| Year: {content["year"]} | |
| Prayer: {content["prayer"]} | |
| Verdict: {content["verdict"]} | |
| Judgement Date: {content["verdictdate"]} | |
| Status: {content["status"]} | |
| Case Type: {content["casetype"]} | |
| petitioner: {content["petitioner"]} | |
| respondent 1: {content["Respondent 1"]} | |
| judge: {content["judge"]} | |
| """ | |
| metadata = { | |
| "case no": str(content["caseno"]), | |
| "year": content["year"] | |
| } | |
| docs.append(Document(page_content=content1, metadata=metadata)) | |
| embeddings = OpenAIEmbeddings(openai_api_key=openapi_key) | |
| # store all IPC section Articles here | |
| vectorstore = Chroma.from_documents(docs, embeddings, persist_directory="./MiyapurCase_db1") | |
| except Exception as er: | |
| print("--exception---",er) | |
| return jsonify("exception occured while processing...") | |
| return "Miyapur court cases are loaded sucessfully" | |
| def loadIPCsections(pathtosjon): | |
| try: | |
| with open(pathtosjon, "r", encoding="utf-8") as f: | |
| print("--processing pls wait---") | |
| ipc_sections = json.load(f) | |
| docs = [] | |
| for section in ipc_sections: | |
| content = f"Section {section['Section']}: {section['section_title']}\n{section['section_desc']}" | |
| metadata = { | |
| "chapter": str(section["chapter"]), | |
| "chapter_title": section["chapter_title"], | |
| "section": str(section["Section"]), | |
| "section_title": section["section_title"] | |
| } | |
| docs.append(Document(page_content=content, metadata=metadata)) | |
| embeddings = OpenAIEmbeddings(openai_api_key=openapi_key) | |
| # store all IPC section Articles here | |
| vectorstore = Chroma.from_documents(docs, embeddings, persist_directory="./ChromaIPC_db") | |
| except Exception as er: | |
| print("--exception---",er) | |
| return jsonify("expcetion occured while processing...") | |
| return "IPC sections are loaded sucessfully" | |
| def trainpdf1(fpath1): | |
| print("- fpath1---",fpath1) | |
| try: | |
| filepath = os.path.join(app.config['UPLOAD_FOLDER'], fpath1.filename) | |
| fpath1.save(filepath) | |
| embeddings = OpenAIEmbeddings(openai_api_key=openapi_key) | |
| loader = PyPDFLoader(filepath) | |
| pages = loader.load() # Returns list of Document objects | |
| court_text = "\n".join([page.page_content for page in pages]) | |
| # Wrap as LangChain document | |
| doc = Document( | |
| page_content=court_text, | |
| metadata={"source": "court order"} | |
| ) | |
| print("- filepath---",filepath) | |
| #text_splitter = RecursiveCharacterTextSplitter( | |
| # chunk_size=800, | |
| # chunk_overlap=200 | |
| #) | |
| #documents = text_splitter.split_documents(pages) | |
| # store all court order documents here | |
| vectorstore = Chroma.from_documents([doc], embeddings, persist_directory="./ChromaCOURT_db") | |
| vectorstore.persist() | |
| llm = ChatOpenAI(model="gpt-4", temperature=0) | |
| prompt = PromptTemplate.from_template(""" | |
| You are a legal assistant. Given the following court order, list the top 5 relevant legal issues or areas that this case involves (e.g., property rights, public nuisance, fundamental rights, illegal construction, etc.) | |
| Court Order: | |
| {order} | |
| List 5 legal areas: | |
| """) | |
| chain = LLMChain(llm=llm, prompt=prompt) | |
| response = chain.run(order=court_text) | |
| IPCsearch = Chroma(persist_directory="./ChromaIPC_db", embedding_function=embeddings ) | |
| areas = [area.strip("1234567890. ").strip() for area in response.split('\n') if area.strip()] | |
| ipc_matches = [] | |
| ipc_results = [] | |
| for area in areas[:5]: # limit to top 5 areas | |
| results = IPCsearch.similarity_search(area, k=1) | |
| if results: | |
| ipc_matches.append((area, results[0])) | |
| # π¨οΈ Print matched IPC sections | |
| for topic, doc in ipc_matches: | |
| print(f"\nπ Legal Area: {topic}") | |
| print(f"π IPC Section: {doc.metadata.get('section')} - {doc.metadata.get('section_title')}") | |
| print(f"π Description: {doc.page_content}") | |
| for topic, doc in ipc_matches: | |
| ipc_results.append({ | |
| "legal_area": topic, | |
| "ipc_section": doc.metadata.get("section"), | |
| "section_title": doc.metadata.get("section_title"), | |
| "description": doc.page_content, | |
| "Orderdocumentation":response | |
| }) | |
| except Exception as er: | |
| print("--exception---",er) | |
| return jsonify("This pdf cannot be trained") | |
| return ipc_results | |
| def getdata(query): | |
| embeddings = OpenAIEmbeddings(openai_api_key=openapi_key) | |
| os.environ['OPENAI_API_KEY'] = openapi_key | |
| your_case_db = Chroma(persist_directory="./ChromaIPC_db", embedding_function=embeddings) | |
| your_case_text = your_case_db.similarity_search("relavant IPC section for bribe", k=1)[0].page_content | |
| print("---your_case_text-----",your_case_text) | |
| # Load SC case database | |
| supreme_db = Chroma(persist_directory="./MiyapurCase_db", embedding_function=embeddings) | |
| retriever = supreme_db.as_retriever(search_kwargs={"k": 5}) | |
| # Ask for relevant judgments | |
| llm = ChatOpenAI(model="gpt-4", temperature=1) | |
| qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever) | |
| response = qa_chain.run(query) | |
| return response | |
| if __name__ == '__main__': | |
| app.run(port=8080) | |