Spaces:

Kartik2204
/

Sentiment-Analysis

Sleeping

App Files Files Community

Kartikay Khosla commited on Sep 25

Commit

00bb6d2

1 Parent(s): afd3717

Add SAURL.py as main entrypoint with Gemini insights

Browse files

Files changed (1) hide show

SAURL.py +62 -69

SAURL.py CHANGED Viewed

@@ -12,6 +12,7 @@ from langdetect import detect
 import streamlit as st
 import io
 from newspaper import Article   # ✅ for URL input
 # ===============================
 # 🔧 Safe SpaCy + Stanza Downloads
@@ -39,6 +40,13 @@ stanza.download('ta')
 nlp_hi = stanza.Pipeline('hi', processors='tokenize,pos', use_gpu=torch.cuda.is_available())
 nlp_ta = stanza.Pipeline('ta', processors='tokenize,pos', use_gpu=torch.cuda.is_available())
 # ===============================
 # Language-Aware Pipeline Loader
@@ -79,7 +87,6 @@ def load_pipelines(language_code):
     return emotion_pipeline, sentiment_pipeline
 # ===============================
 # DOCX Reader – keep paras separate
 # ===============================
@@ -88,7 +95,6 @@ def read_and_split_articles(file_path):
     paragraphs = [para.text.strip() for para in doc.paragraphs if para.text.strip()]
     return paragraphs
 # ===============================
 # URL Reader – title + main body
 # ===============================
@@ -101,7 +107,6 @@ def read_article_from_url(url):
     full_text = f"{title}\n\n{body}"
     return full_text
 # ===============================
 # Filter Neutral
 # ===============================
@@ -112,7 +117,6 @@ def filter_neutral(emotion_results, neutral_threshold=0.75):
         scores.pop("neutral")
     return scores
 # ===============================
 # Sentence Splitter
 # ===============================
@@ -126,7 +130,6 @@ def split_sentences(text, lang):
         sentences = [sent.text.strip() for sent in doc.sents]
     return [s.strip() for s in sentences if s.strip()]
 # ===============================
 # POS Tagger
 # ===============================
@@ -143,16 +146,47 @@ def get_pos_tags(sentence, lang):
     else:
         return []
 # ===============================
 # Analysis Function
 # ===============================
 def analyze_article(article_text, lang, emotion_pipeline, sentiment_pipeline):
-    results_summary = []
     export_rows = []
-    para_counters = []
-    emotion_to_sentences = {}
     paragraphs = [p.strip() for p in article_text.split("\n\n") if p.strip()]
     if len(paragraphs) <= 1:
         paragraphs = [p.strip() for p in article_text.split("\n") if p.strip()]
@@ -175,7 +209,8 @@ def analyze_article(article_text, lang, emotion_pipeline, sentiment_pipeline):
             all_sentiments.append(max(sentiment_results, key=lambda x: x["score"]))
     if total_length > 0:
-        weighted_scores = {emo: round(val / total_length, 3) for emo, val in weighted_scores.items()}
     overall_sentiment = max(all_sentiments, key=lambda x: x["score"]) if all_sentiments else {}
@@ -193,63 +228,36 @@ def analyze_article(article_text, lang, emotion_pipeline, sentiment_pipeline):
     # Paragraph-level
     for p_idx, para in enumerate(paragraphs, start=1):
         para_counter = Counter()
         sentences = split_sentences(para, lang[:2])
         for sentence in sentences:
             results = emotion_pipeline(sentence[:512])[0]
             filtered = filter_neutral(results, neutral_threshold=0.75)
             for emo, score in filtered.items():
                 para_counter[emo] += score
-                if emo not in emotion_to_sentences:
-                    emotion_to_sentences[emo] = []
-                if emo in sorted(filtered, key=filtered.get, reverse=True)[:5]:
-                    emotion_to_sentences[emo].append(f"(Para {p_idx}) {sentence}")
-        para_counters.append((para, dict(sorted(para_counter.items(), key=lambda x:x[1], reverse=True))))
         st.write(f"\n📑 Paragraph {p_idx}: {para}")
-        st.write("Emotions →", para_counters[-1][1])
         export_rows.append({
             "Type": "Paragraph",
             "Text": para,
-            "Emotions": para_counters[-1][1],
-            "Sentiment": ""
         })
-    # Sentence-level
-    st.subheader("📝 SENTENCES")
-    for para in paragraphs:
-        sentences = split_sentences(para, lang[:2])
-        for sentence in sentences:
-            pos_tags = get_pos_tags(sentence, lang[:2])
-            results = emotion_pipeline(sentence[:512])[0]
-            filtered = filter_neutral(results, neutral_threshold=0.75)
-            sentiment_results = sentiment_pipeline(sentence[:512])[0]
-            best_sentiment = max(sentiment_results, key=lambda x: x["score"])
-            results_summary.append({
-                "sentence": sentence,
-                "pos_tags": pos_tags,
-                "emotions": filtered,
-                "sentiment": best_sentiment
-            })
-            st.write(f"Sentence: {sentence}")
-            st.write(f"POS Tags → {pos_tags}")
-            st.write(f"Emotions → {filtered}")
-            st.write(f"Sentiment → {best_sentiment['label']} ({round(best_sentiment['score'],4)})\n")
-            for emo in sorted(filtered, key=filtered.get, reverse=True)[:5]:
-                if emo not in emotion_to_sentences:
-                    emotion_to_sentences[emo] = []
-                emotion_to_sentences[emo].append(f"(Sentence) {sentence}")
-            export_rows.append({
-                "Type": "Sentence",
-                "Text": sentence,
-                "Emotions": filtered,
-                "Sentiment": best_sentiment
-            })
-    return results_summary, export_rows, emotion_to_sentences
 # ===============================
 # Streamlit App
@@ -275,9 +283,7 @@ if st.button("🔍 Analyze"):
         detected_lang = detect(text_to_analyze[:200]) if text_to_analyze else "en"
         emotion_pipeline, sentiment_pipeline = load_pipelines(detected_lang)
-        results, export_rows, emotion_to_sentences = analyze_article(
-            text_to_analyze, detected_lang, emotion_pipeline, sentiment_pipeline
-        )
         # ✅ Download buttons FIRST
         df_export = pd.DataFrame(export_rows)
@@ -298,16 +304,3 @@ if st.button("🔍 Analyze"):
             file_name="analysis_results.xlsx",
             mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
         )
-        # ✅ Emotion filter tabs at the end
-        if emotion_to_sentences and len(emotion_to_sentences) > 0:
-            st.subheader("🎭 Explore by Emotion (Top 5 only)")
-            emotion_list = list(emotion_to_sentences.keys())
-            tabs = st.tabs(emotion_list)
-            for idx, emo in enumerate(emotion_list):
-                with tabs[idx]:
-                    st.write(f"### 🔹 {emo.upper()}")
-                    for text in emotion_to_sentences[emo]:
-                        st.write(f"- {text}")
-        else:
-            st.info("No emotions strong enough to show in Top 5 filters.")

 import streamlit as st
 import io
 from newspaper import Article   # ✅ for URL input
+import google.generativeai as genai   # ✅ Gemini for insights
 # ===============================
 # 🔧 Safe SpaCy + Stanza Downloads
 nlp_hi = stanza.Pipeline('hi', processors='tokenize,pos', use_gpu=torch.cuda.is_available())
 nlp_ta = stanza.Pipeline('ta', processors='tokenize,pos', use_gpu=torch.cuda.is_available())
+# ===============================
+# Gemini setup
+# ===============================
+api_key = os.getenv("GEMINI_API_KEY")
+if not api_key:
+    raise ValueError("❌ Missing GEMINI_API_KEY. Please set it in Hugging Face secrets or locally.")
+genai.configure(api_key=api_key)
 # ===============================
 # Language-Aware Pipeline Loader
     return emotion_pipeline, sentiment_pipeline
 # ===============================
 # DOCX Reader – keep paras separate
 # ===============================
     paragraphs = [para.text.strip() for para in doc.paragraphs if para.text.strip()]
     return paragraphs
 # ===============================
 # URL Reader – title + main body
 # ===============================
     full_text = f"{title}\n\n{body}"
     return full_text
 # ===============================
 # Filter Neutral
 # ===============================
         scores.pop("neutral")
     return scores
 # ===============================
 # Sentence Splitter
 # ===============================
         sentences = [sent.text.strip() for sent in doc.sents]
     return [s.strip() for s in sentences if s.strip()]
 # ===============================
 # POS Tagger
 # ===============================
     else:
         return []
+# ===============================
+# Gemini – Generate Insight
+# ===============================
+def generate_insight(paragraph, emotions, sentiment):
+    """Use Gemini to suggest improvements with Top 3 emotions only"""
+    try:
+        top_emotions = sorted(emotions.items(), key=lambda x: x[1], reverse=True)[:3]
+        emo_text = ", ".join([f"{k}: {v}" for k, v in top_emotions])
+        sent_text = f"{sentiment['label']} ({round(sentiment['score'], 3)})" if sentiment else "N/A"
+        prompt = (
+            f"Here is a paragraph:\n\n{paragraph}\n\n"
+            f"Top 3 detected emotions: {emo_text}\n"
+            f"Overall sentiment: {sent_text}\n\n"
+            "👉 Please suggest how to rewrite or improve this paragraph for better clarity, "
+            "engagement, and emotional impact. Provide constructive, content-specific insights."
+        )
+        model = genai.GenerativeModel("gemini-1.5-flash")
+        response = model.generate_content(prompt)
+        return response.text.strip() if response and response.text else "No insight generated."
+    except Exception as e:
+        return f"⚠️ Insight generation failed: {str(e)}"
+# ===============================
+# Normalize Scores (scale to 1)
+# ===============================
+def normalize_scores(scores: dict):
+    if not scores:
+        return scores
+    max_val = max(scores.values())
+    if max_val == 0:
+        return scores
+    return {k: round(v / max_val, 3) for k, v in scores.items()}
 # ===============================
 # Analysis Function
 # ===============================
 def analyze_article(article_text, lang, emotion_pipeline, sentiment_pipeline):
     export_rows = []
     paragraphs = [p.strip() for p in article_text.split("\n\n") if p.strip()]
     if len(paragraphs) <= 1:
         paragraphs = [p.strip() for p in article_text.split("\n") if p.strip()]
             all_sentiments.append(max(sentiment_results, key=lambda x: x["score"]))
     if total_length > 0:
+        weighted_scores = {emo: val / total_length for emo, val in weighted_scores.items()}
+        weighted_scores = normalize_scores(weighted_scores)   # ✅ normalize to scale of 1
     overall_sentiment = max(all_sentiments, key=lambda x: x["score"]) if all_sentiments else {}
     # Paragraph-level
     for p_idx, para in enumerate(paragraphs, start=1):
         para_counter = Counter()
+        all_para_sentiments = []
         sentences = split_sentences(para, lang[:2])
         for sentence in sentences:
             results = emotion_pipeline(sentence[:512])[0]
             filtered = filter_neutral(results, neutral_threshold=0.75)
             for emo, score in filtered.items():
                 para_counter[emo] += score
+            sentiment_results = sentiment_pipeline(sentence[:512])[0]
+            all_para_sentiments.append(max(sentiment_results, key=lambda x: x["score"]))
+        para_emotions = dict(sorted(para_counter.items(), key=lambda x: x[1], reverse=True))
+        para_emotions = normalize_scores(para_emotions)   # ✅ normalize to scale of 1
+        para_sentiment = max(all_para_sentiments, key=lambda x: x["score"]) if all_para_sentiments else {}
         st.write(f"\n📑 Paragraph {p_idx}: {para}")
+        st.write("Emotions →", para_emotions)
+        st.write("Sentiment →", para_sentiment)
+        insight = generate_insight(para, para_emotions, para_sentiment)
+        st.write("💡 Insight →", insight)
         export_rows.append({
             "Type": "Paragraph",
             "Text": para,
+            "Emotions": para_emotions,
+            "Sentiment": para_sentiment,
+            "Insight": insight
         })
+    return export_rows
 # ===============================
 # Streamlit App
         detected_lang = detect(text_to_analyze[:200]) if text_to_analyze else "en"
         emotion_pipeline, sentiment_pipeline = load_pipelines(detected_lang)
+        export_rows = analyze_article(text_to_analyze, detected_lang, emotion_pipeline, sentiment_pipeline)
         # ✅ Download buttons FIRST
         df_export = pd.DataFrame(export_rows)
             file_name="analysis_results.xlsx",
             mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
         )