vijayksagi commited on
Commit
068e0bd
·
verified ·
1 Parent(s): e039d40

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +45 -0
  2. checkvectordb.py +236 -0
  3. mydocker.dockerfile +47 -0
  4. requirements.txt +150 -0
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask,render_template,request,jsonify
2
+ from flask_cors import CORS
3
+ from checkvectordb import getdata,trainpdf1,loadIPCsections,loadmiyapurcourtorders
4
+ app = Flask(__name__)
5
+ CORS(app)
6
+
7
+ @app.get("/")
8
+ def index_get():
9
+ print("just started...")
10
+ #response=loadIPCsections("./uploads/ipc.json")
11
+ #response=loadmiyapurcourtorders("./uploads/court_history.json")
12
+
13
+ #print("------",response)
14
+ return render_template("base.html")
15
+
16
+
17
+ @app.post("/trainpdf")
18
+ def trainpdf():
19
+ try:
20
+ pdf_file = request.files['file']
21
+ response=trainpdf1(pdf_file)
22
+ print("response :",response);
23
+ message={"success":response}
24
+ except Exception as we:
25
+ message={"error":response}
26
+ print("-error -", we)
27
+
28
+
29
+ return response
30
+
31
+
32
+
33
+ @app.post("/predict")
34
+ def predict():
35
+
36
+ text=request.get_json().get("message")
37
+ print("from web ",text);
38
+ response=getdata(text)
39
+ message={"answer":response}
40
+ print("message ",message)
41
+ return message
42
+
43
+ if __name__ == "__main__":
44
+ app.run(host='0.0.0.0', port=5000)
45
+
checkvectordb.py ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from flask import Flask, request,render_template, send_from_directory
3
+
4
+ from flask import Flask, request, jsonify, render_template_string
5
+ import openai
6
+ import langchain
7
+ import os
8
+ from langchain.document_loaders import PyPDFLoader
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ from langchain.embeddings.openai import OpenAIEmbeddings
11
+ from langchain.vectorstores import Chroma
12
+ from langchain import OpenAI
13
+ from langchain.schema import Document
14
+ from langchain.chains.question_answering import load_qa_chain
15
+ from langchain.chains import RetrievalQA
16
+ from langchain.chat_models import ChatOpenAI
17
+ import re
18
+ from pdfminer.high_level import extract_pages
19
+ from pdfminer.layout import LTTextContainer, LTChar
20
+ from langchain.prompts import PromptTemplate
21
+ from langchain.chains import LLMChain
22
+
23
+ import json
24
+
25
+ import pdfplumber
26
+ import fitz # PyMuPDF
27
+
28
+
29
+ import chromadb
30
+ from chromadb.config import Settings
31
+
32
+ app = Flask(__name__)
33
+ os.environ["OPENAI_API_KEY"] = "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"
34
+
35
+ app.config['UPLOAD_FOLDER'] = 'uploads'
36
+ openapi_key="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"
37
+ fpath="./SCHOOL ADMISSI0N TEST 2025-2026.pdf"
38
+ text_splitter=RecursiveCharacterTextSplitter(chunk_size=800,chunk_overlap=50,length_function=len,separators=["\n\n","\n"," "])
39
+ global_var = "I am global"
40
+
41
+
42
+ def loadmiyapurcourtorders(pathtosjon):
43
+ try:
44
+ with open(pathtosjon, "r", encoding="utf-8") as f:
45
+ print("--processing pls wait---")
46
+ courtcontent = json.load(f)
47
+
48
+ docs = []
49
+ for content in courtcontent:
50
+
51
+ content1 = f"""
52
+ Court Level: {content["court_level"]}
53
+ Case No: {content["caseno"]}
54
+ Year: {content["year"]}
55
+ Prayer: {content["prayer"]}
56
+ Verdict: {content["verdict"]}
57
+ Judgement Date: {content["verdictdate"]}
58
+ Status: {content["status"]}
59
+ Case Type: {content["casetype"]}
60
+ petitioner: {content["petitioner"]}
61
+ respondent 1: {content["Respondent 1"]}
62
+ judge: {content["judge"]}
63
+
64
+ """
65
+
66
+
67
+ metadata = {
68
+ "case no": str(content["caseno"]),
69
+ "year": content["year"]
70
+
71
+
72
+ }
73
+
74
+
75
+
76
+ docs.append(Document(page_content=content1, metadata=metadata))
77
+
78
+ embeddings = OpenAIEmbeddings(openai_api_key=openapi_key)
79
+ # store all IPC section Articles here
80
+ vectorstore = Chroma.from_documents(docs, embeddings, persist_directory="./MiyapurCase_db1")
81
+ except Exception as er:
82
+ print("--exception---",er)
83
+ return jsonify("exception occured while processing...")
84
+
85
+
86
+ return "Miyapur court cases are loaded sucessfully"
87
+
88
+
89
+
90
+ def loadIPCsections(pathtosjon):
91
+
92
+ try:
93
+ with open(pathtosjon, "r", encoding="utf-8") as f:
94
+ print("--processing pls wait---")
95
+ ipc_sections = json.load(f)
96
+
97
+ docs = []
98
+ for section in ipc_sections:
99
+ content = f"Section {section['Section']}: {section['section_title']}\n{section['section_desc']}"
100
+ metadata = {
101
+ "chapter": str(section["chapter"]),
102
+ "chapter_title": section["chapter_title"],
103
+ "section": str(section["Section"]),
104
+ "section_title": section["section_title"]
105
+ }
106
+ docs.append(Document(page_content=content, metadata=metadata))
107
+
108
+ embeddings = OpenAIEmbeddings(openai_api_key=openapi_key)
109
+ # store all IPC section Articles here
110
+ vectorstore = Chroma.from_documents(docs, embeddings, persist_directory="./ChromaIPC_db")
111
+ except Exception as er:
112
+ print("--exception---",er)
113
+ return jsonify("expcetion occured while processing...")
114
+
115
+
116
+ return "IPC sections are loaded sucessfully"
117
+
118
+ def trainpdf1(fpath1):
119
+
120
+ print("- fpath1---",fpath1)
121
+ try:
122
+ filepath = os.path.join(app.config['UPLOAD_FOLDER'], fpath1.filename)
123
+ fpath1.save(filepath)
124
+ embeddings = OpenAIEmbeddings(openai_api_key=openapi_key)
125
+ loader = PyPDFLoader(filepath)
126
+ pages = loader.load() # Returns list of Document objects
127
+
128
+
129
+ court_text = "\n".join([page.page_content for page in pages])
130
+
131
+ # Wrap as LangChain document
132
+ doc = Document(
133
+ page_content=court_text,
134
+ metadata={"source": "court order"}
135
+ )
136
+
137
+ print("- filepath---",filepath)
138
+
139
+ #text_splitter = RecursiveCharacterTextSplitter(
140
+ # chunk_size=800,
141
+ # chunk_overlap=200
142
+ #)
143
+ #documents = text_splitter.split_documents(pages)
144
+
145
+
146
+ # store all court order documents here
147
+ vectorstore = Chroma.from_documents([doc], embeddings, persist_directory="./ChromaCOURT_db")
148
+ vectorstore.persist()
149
+
150
+ llm = ChatOpenAI(model="gpt-4", temperature=0)
151
+
152
+ prompt = PromptTemplate.from_template("""
153
+ You are a legal assistant. Given the following court order, list the top 5 relevant legal issues or areas that this case involves (e.g., property rights, public nuisance, fundamental rights, illegal construction, etc.)
154
+
155
+ Court Order:
156
+ {order}
157
+
158
+ List 5 legal areas:
159
+ """)
160
+
161
+ chain = LLMChain(llm=llm, prompt=prompt)
162
+ response = chain.run(order=court_text)
163
+
164
+ IPCsearch = Chroma(persist_directory="./ChromaIPC_db", embedding_function=embeddings )
165
+
166
+
167
+ areas = [area.strip("1234567890. ").strip() for area in response.split('\n') if area.strip()]
168
+ ipc_matches = []
169
+ ipc_results = []
170
+
171
+ for area in areas[:5]: # limit to top 5 areas
172
+ results = IPCsearch.similarity_search(area, k=1)
173
+ if results:
174
+ ipc_matches.append((area, results[0]))
175
+
176
+ # 🖨️ Print matched IPC sections
177
+ for topic, doc in ipc_matches:
178
+ print(f"\n📘 Legal Area: {topic}")
179
+ print(f"🔗 IPC Section: {doc.metadata.get('section')} - {doc.metadata.get('section_title')}")
180
+ print(f"📄 Description: {doc.page_content}")
181
+
182
+ for topic, doc in ipc_matches:
183
+ ipc_results.append({
184
+ "legal_area": topic,
185
+ "ipc_section": doc.metadata.get("section"),
186
+ "section_title": doc.metadata.get("section_title"),
187
+ "description": doc.page_content,
188
+ "Orderdocumentation":response
189
+ })
190
+
191
+
192
+
193
+ except Exception as er:
194
+ print("--exception---",er)
195
+ return jsonify("This pdf cannot be trained")
196
+
197
+
198
+ return ipc_results
199
+
200
+ def getdata(query):
201
+
202
+ embeddings = OpenAIEmbeddings(openai_api_key=openapi_key)
203
+ os.environ['OPENAI_API_KEY'] = openapi_key
204
+
205
+ your_case_db = Chroma(persist_directory="./ChromaIPC_db", embedding_function=embeddings)
206
+ your_case_text = your_case_db.similarity_search("relavant IPC section for bribe", k=1)[0].page_content
207
+
208
+ print("---your_case_text-----",your_case_text)
209
+
210
+ # Load SC case database
211
+ supreme_db = Chroma(persist_directory="./MiyapurCase_db", embedding_function=embeddings)
212
+ retriever = supreme_db.as_retriever(search_kwargs={"k": 5})
213
+
214
+ # Ask for relevant judgments
215
+ llm = ChatOpenAI(model="gpt-4", temperature=1)
216
+ qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
217
+
218
+ response = qa_chain.run(query)
219
+
220
+
221
+
222
+
223
+ return response
224
+
225
+
226
+
227
+
228
+
229
+
230
+
231
+
232
+
233
+
234
+
235
+ if __name__ == '__main__':
236
+ app.run(port=8080)
mydocker.dockerfile ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ FROM python 3.10
3
+ FROM continuumio/miniconda3
4
+
5
+ RUN useradd user
6
+
7
+ USER user
8
+
9
+
10
+ COPY ./requirements.txt requirements.txt
11
+
12
+
13
+
14
+ ENV PYTHONDONTWRITEBYTECODE=1
15
+ ENV PYTHONUNBUFFERED=1
16
+
17
+ # Set working directory
18
+ WORKDIR /app
19
+
20
+ # Copy backend files
21
+ COPY . /app
22
+
23
+ # Create and activate conda env with Python 3.10
24
+ RUN conda create -n myenv python=3.10 -y && \
25
+ echo "conda activate myenv" >> ~/.bashrc && \
26
+ /bin/bash -c "source ~/.bashrc && conda activate myenv && pip install --upgrade pip && pip install -r requirements.txt"
27
+
28
+ # Clone React frontend from GitHub
29
+ RUN apt-get update && \
30
+ apt-get install -y git curl && \
31
+ git clone https://github.com/vijaysagi12/myinzackBot.git /frontend
32
+
33
+ # Install Node.js & npm (for React)
34
+ RUN curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \
35
+ apt-get install -y nodejs && \
36
+ cd /frontend && npm install && npm run build
37
+
38
+ # Expose Flask and React ports
39
+ EXPOSE 5000 3000
40
+
41
+ # Start both Flask and React servers
42
+ CMD ["/bin/bash", "-c", "source activate myenv && \
43
+ (cd /frontend && npm start &) && \
44
+ python app.py"]
45
+
46
+
47
+
requirements.txt ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohappyeyeballs==2.6.1
2
+ aiohttp==3.12.0
3
+ aiosignal==1.3.2
4
+ annotated-types==0.7.0
5
+ anyio==4.9.0
6
+ asgiref==3.8.1
7
+ async-timeout==4.0.3
8
+ attrs==25.3.0
9
+ backoff==2.2.1
10
+ bcrypt==4.3.0
11
+ blinker==1.9.0
12
+ build==1.2.2.post1
13
+ cachetools==5.5.2
14
+ certifi==2025.4.26
15
+ cffi==1.17.1
16
+ charset-normalizer==3.4.2
17
+ chromadb==1.0.10
18
+ click==8.1.8
19
+ colorama==0.4.6
20
+ coloredlogs==15.0.1
21
+ contourpy==1.3.2
22
+ cryptography==45.0.4
23
+ cycler==0.12.1
24
+ dataclasses-json==0.6.7
25
+ Deprecated==1.2.18
26
+ distro==1.9.0
27
+ durationpy==0.10
28
+ exceptiongroup==1.3.0
29
+ fastapi==0.115.9
30
+ filelock==3.18.0
31
+ Flask==3.1.1
32
+ flask-cors==6.0.0
33
+ flatbuffers==25.2.10
34
+ fonttools==4.58.4
35
+ frozenlist==1.6.0
36
+ fsspec==2025.5.1
37
+ google-auth==2.40.2
38
+ googleapis-common-protos==1.70.0
39
+ greenlet==3.2.2
40
+ grpcio==1.71.0
41
+ h11==0.16.0
42
+ httpcore==1.0.9
43
+ httptools==0.6.4
44
+ httpx==0.28.1
45
+ httpx-sse==0.4.0
46
+ huggingface-hub==0.32.1
47
+ humanfriendly==10.0
48
+ idna==3.10
49
+ importlib_metadata==8.6.1
50
+ importlib_resources==6.5.2
51
+ itsdangerous==2.2.0
52
+ Jinja2==3.1.6
53
+ jiter==0.10.0
54
+ joblib==1.5.1
55
+ jsonpatch==1.33
56
+ jsonpointer==3.0.0
57
+ jsonschema==4.23.0
58
+ jsonschema-specifications==2025.4.1
59
+ kiwisolver==1.4.8
60
+ kubernetes==32.0.1
61
+ langchain==0.3.25
62
+ langchain-community==0.3.24
63
+ langchain-core==0.3.61
64
+ langchain-text-splitters==0.3.8
65
+ langsmith==0.3.42
66
+ markdown-it-py==3.0.0
67
+ MarkupSafe==3.0.2
68
+ marshmallow==3.26.1
69
+ mdurl==0.1.2
70
+ mmh3==5.1.0
71
+ mpmath==1.3.0
72
+ multidict==6.4.4
73
+ mypy_extensions==1.1.0
74
+ numpy==2.2.6
75
+ oauthlib==3.2.2
76
+ onnxruntime==1.22.0
77
+ openai==1.82.0
78
+ opentelemetry-api==1.33.1
79
+ opentelemetry-exporter-otlp-proto-common==1.33.1
80
+ opentelemetry-exporter-otlp-proto-grpc==1.33.1
81
+ opentelemetry-instrumentation==0.54b1
82
+ opentelemetry-instrumentation-asgi==0.54b1
83
+ opentelemetry-instrumentation-fastapi==0.54b1
84
+ opentelemetry-proto==1.33.1
85
+ opentelemetry-sdk==1.33.1
86
+ opentelemetry-semantic-conventions==0.54b1
87
+ opentelemetry-util-http==0.54b1
88
+ orjson==3.10.18
89
+ overrides==7.7.0
90
+ packaging==24.2
91
+ pdfminer.six==20250506
92
+ pdfplumber==0.11.7
93
+ pillow==11.2.1
94
+ posthog==4.2.0
95
+ propcache==0.3.1
96
+ protobuf==5.29.4
97
+ pyasn1==0.6.1
98
+ pyasn1_modules==0.4.2
99
+ pycparser==2.22
100
+ pydantic==2.11.5
101
+ pydantic-settings==2.9.1
102
+ pydantic_core==2.33.2
103
+ Pygments==2.19.1
104
+ PyMuPDF==1.26.1
105
+ pyparsing==3.2.3
106
+ pypdf==5.5.0
107
+ PyPDF2==3.0.1
108
+ pypdfium2==4.30.1
109
+ PyPika==0.48.9
110
+ pyproject_hooks==1.2.0
111
+ pyreadline3==3.5.4
112
+ python-dateutil==2.9.0.post0
113
+ python-dotenv==1.1.0
114
+ PyYAML==6.0.2
115
+ referencing==0.36.2
116
+ regex==2024.11.6
117
+ requests==2.32.3
118
+ requests-oauthlib==2.0.0
119
+ requests-toolbelt==1.0.0
120
+ rich==14.0.0
121
+ rpds-py==0.25.1
122
+ rsa==4.9.1
123
+ scikit-learn==1.7.0
124
+ scipy==1.15.3
125
+ shellingham==1.5.4
126
+ six==1.17.0
127
+ sniffio==1.3.1
128
+ SQLAlchemy==2.0.41
129
+ starlette==0.45.3
130
+ sympy==1.14.0
131
+ tenacity==9.1.2
132
+ threadpoolctl==3.6.0
133
+ tiktoken==0.9.0
134
+ tokenizers==0.21.1
135
+ tomli==2.2.1
136
+ tqdm==4.67.1
137
+ typer==0.15.4
138
+ typing-inspect==0.9.0
139
+ typing-inspection==0.4.1
140
+ typing_extensions==4.13.2
141
+ urllib3==2.4.0
142
+ uvicorn==0.34.2
143
+ watchfiles==1.0.5
144
+ websocket-client==1.8.0
145
+ websockets==15.0.1
146
+ Werkzeug==3.1.3
147
+ wrapt==1.17.2
148
+ yarl==1.20.0
149
+ zipp==3.21.0
150
+ zstandard==0.23.0