Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| from datasets import load_dataset | |
| import networkx as nx | |
| import numpy as np | |
| import pandas as pd | |
| dataset = load_dataset("roneneldan/TinyStories") | |
| st.markdown('# Short Stories, networks and connections') | |
| st.markdown('In this example we consider the semantic similarity between short stories generatited by GenAI.') | |
| st.markdown('We study the relationshis between the stories using a network. The laplacian connectivity provides inights about the closeness of the graph') | |
| st.markdown('## Short Stories') | |
| st.markdown('We are using a sample fo the [TinyStories](roneneldan/TinyStories) dataset from roneneldan work') | |
| text_text = dataset['train'][10]['text'] | |
| st.markdown("<span style='color:red'>" + text_text.replace('\n',' ') + "</span>",unsafe_allow_html=True) | |
| st.markdown('The threshold changes the level of connectivity in the network. The reange is from 0 (less similar) to 1 (completely similar)') | |
| threshhold = st.slider('Threshhold',0.0,1.0,step=0.1) | |
| #------------------------------------------------------------- | |
| #------------------------------------------------------------- | |
| from sentence_transformers import SentenceTransformer, util | |
| model = SentenceTransformer('all-MiniLM-L6-v2') | |
| # Sentences from the data set | |
| #sentences = [item['text'] for item in dataset['train'][:10]] | |
| #sentences = [dataset['train'][0],dataset['train'][1],dataset['train'][2]] | |
| sentences = [dataset['train'][ii] for ii in range(10)] | |
| #Compute embedding | |
| embeddings = model.encode(sentences, convert_to_tensor=True) | |
| #Compute cosine-similarities | |
| cosine_scores = util.cos_sim(embeddings, embeddings) | |
| # creating adjacency matrix | |
| A = np.zeros((len(sentences),len(sentences))) | |
| #Output the pairs with their score | |
| for i in range(len(sentences)): | |
| for j in range(i): | |
| #st.write("{} \t\t {} \t\t Score: {:.4f}".format(sentences[i], sentences[j], cosine_scores[i][j])) | |
| A[i][j] = cosine_scores[i][j] | |
| A[j][i] = cosine_scores[i][j] | |
| #G = nx.from_numpy_array(A) | |
| G = nx.from_numpy_array(cosine_scores.numpy()>threshhold) | |
| st.markdown('We can visualize the similarity between the shorts stories as a network. It the similarity is greater than the threshold, the two nodes are conencted') | |
| #------------------------------------------------------------- | |
| #------------------------------------------------------------- | |
| # ego_graph.py | |
| # An example of how to plot a node's ego network | |
| # (egonet). This indirectly showcases slightly more involved | |
| # interoperability between streamlit-agraph and networkx. | |
| # An egonet can be # created from (almost) any network (graph), | |
| # and exemplifies the # concept of a subnetwork (subgraph): | |
| # A node's egonet is the (sub)network comprised of the focal node | |
| # and all the nodes to whom it is adjacent. The edges included | |
| # in the egonet are those nodes are both included in the aforementioned | |
| # nodes. | |
| # Use the following command to launch the app | |
| # streamlit run <path-to-script>.py | |
| # standard library dependencies | |
| from operator import itemgetter | |
| # external dependencies | |
| import networkx as nx | |
| from streamlit_agraph import agraph, Node, Edge, Config | |
| # First create a graph using the Barabasi-Albert model | |
| n = 2000 | |
| m = 2 | |
| #G = nx.generators.barabasi_albert_graph(n, m, seed=2023) | |
| # Then find the node with the largest degree; | |
| # This node's egonet will be the focus of this example. | |
| node_and_degree = G.degree() | |
| most_connected_node = sorted(G.degree, key=lambda x: x[1], reverse=True)[0] | |
| degree = G.degree(most_connected_node) | |
| # Create egonet for the focal node | |
| hub_ego = nx.ego_graph(G, most_connected_node[0]) | |
| # Now create the equivalent Node and Edge lists | |
| nodes = [Node(title=str(sentences[i]['text']), id=i, label='node_'+str(i), size=20) for i in hub_ego.nodes] | |
| edges = [Edge(source=i, target=j, type="CURVE_SMOOTH") for (i,j) in G.edges | |
| if i in hub_ego.nodes and j in hub_ego.nodes] | |
| config = Config(width=500, | |
| height=500, | |
| directed=True, | |
| nodeHighlightBehavior=False, | |
| highlightColor="#F7A7A6", # or "blue" | |
| collapsible=False, | |
| node={'labelProperty':'label'}, | |
| # **kwargs e.g. node_size=1000 or node_color="blue" | |
| ) | |
| return_value = agraph(nodes=nodes, | |
| edges=edges, | |
| config=config) | |
| st.markdown('The Laplacian centrality is a measure of closeness') | |
| st.write(str(nx.laplacian_centrality(G))) | |
| d_lc = nx.laplacian_centrality(G) | |
| #st.write(d_lc[0]) | |
| #df_lc = pd.DataFrame.from_dict(nx.laplacian_centrality(G)) | |
| df_lc = pd.DataFrame(np.transpose([list(d_lc.keys()),list(d_lc.values())]),columns=['node','laplacian_centrality']) | |
| st.bar_chart(df_lc,x='node',y='laplacian_centrality') |