mohsenfayyaz commited on
Commit
826bc35
·
verified ·
1 Parent(s): d85f9ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +190 -29
app.py CHANGED
@@ -3,9 +3,11 @@ import gradio as gr
3
  import pandas as pd
4
  import datetime
5
  import plotly.express as px
 
6
  import datasets
7
 
8
 
 
9
  def split_multi_users(dfs):
10
  df = dfs.copy()
11
  df["usernames"] = df["username"].apply(lambda x: x.split(", "))
@@ -21,9 +23,9 @@ def split_multi_users(dfs):
21
  df = pd.DataFrame(new_df)
22
  return df
23
 
24
- def plot_now():
25
  ### Load Data
26
- dfs = datasets.load_dataset("pluslab/PLUS_Lab_GPUs_Data", download_mode='force_redownload')["train"].to_pandas()
27
  dfs = dfs.drop(columns=["Unnamed: 0"])
28
  dfs = dfs.fillna("FREE")
29
  dfs_plot = split_multi_users(dfs)
@@ -43,35 +45,194 @@ def plot_now():
43
  # print(dfs_plot)
44
  return fig, dfs
45
 
46
- def plot_history(sample=True, sampling_interval_minutes=180):
47
- dfh = pd.read_pickle("hf://spaces/pluslab/PLUS_Lab_GPUs/history.pkl.gz", )
48
- dfh = dfh.fillna("FREE")
49
- dfh = split_multi_users(dfh)
50
- dfh = dfh[["polling_timestamp", "username", "count"]]
51
- dfh = dfh.groupby(["polling_timestamp", "username"]).sum()
52
- dfh = dfh.reset_index()
53
- dfh = dfh.sort_values(by=["polling_timestamp", "count"], ascending=False)
54
- if sample:
55
- unique_timestamps = dfh["polling_timestamp"].unique()
56
- sampled_timestamps = [unique_timestamps[0]]
57
- for i, t in enumerate(unique_timestamps[1:]):
58
- diff = sampled_timestamps[-1] - t
59
- if diff > datetime.timedelta(minutes=sampling_interval_minutes):
60
- sampled_timestamps.append(t)
61
- dfh = dfh[dfh["polling_timestamp"].isin(sampled_timestamps)]
62
- fig = px.area(dfh, x="polling_timestamp", y="count", color='username', color_discrete_map={"FREE": "black",}, markers=True, line_shape='spline',)
63
- return fig, dfh
64
 
 
 
 
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  def plot_figs():
67
- fig_now, dfn = plot_now()
68
- try:
69
- fig_history, dfh = plot_history()
70
- except Exception as e:
71
- print(e)
72
- fig_history = None
73
- dfh = None
74
- return fig_now, dfn, fig_history
75
 
76
  demo = gr.Interface(
77
  fn=plot_figs,
@@ -81,7 +242,7 @@ demo = gr.Interface(
81
  outputs = [
82
  gr.Plot(label="GPU Status", elem_classes="plotcss"),
83
  gr.Dataframe(label="GPU Status Details"),
84
- gr.Plot(label="History", elem_classes="plotcss"),
85
  ],
86
  live=True,
87
  flagging_options=[],
 
3
  import pandas as pd
4
  import datetime
5
  import plotly.express as px
6
+ import plotly.graph_objects as go
7
  import datasets
8
 
9
 
10
+ ##### GPU PLOT #####
11
  def split_multi_users(dfs):
12
  df = dfs.copy()
13
  df["usernames"] = df["username"].apply(lambda x: x.split(", "))
 
23
  df = pd.DataFrame(new_df)
24
  return df
25
 
26
+ def plot_gpus():
27
  ### Load Data
28
+ dfs = datasets.load_dataset("pluslab/PLUS_Lab_GPUs_Data", data_files="gpus.csv", download_mode='force_redownload')["train"].to_pandas()
29
  dfs = dfs.drop(columns=["Unnamed: 0"])
30
  dfs = dfs.fillna("FREE")
31
  dfs_plot = split_multi_users(dfs)
 
45
  # print(dfs_plot)
46
  return fig, dfs
47
 
48
+ ##### DISK PLOT #####
49
+ def _pick_col(df, candidates):
50
+ norm = {c.strip().lower(): c for c in df.columns}
51
+ for cand in candidates:
52
+ cand = cand.strip().lower()
53
+ if cand in norm:
54
+ return norm[cand]
55
+ return None
 
 
 
 
 
 
 
 
 
 
56
 
57
+ def _kblocks_to_tib(kblocks):
58
+ # KiB blocks -> TiB (so 104149210112 -> ~97.0)
59
+ return kblocks / (1024**3)
60
 
61
+ def plot_disks(alert_threshold_pct=99.0):
62
+ df = datasets.load_dataset(
63
+ "pluslab/PLUS_Lab_GPUs_Data",
64
+ data_files="disks.csv",
65
+ download_mode="force_redownload",
66
+ )["train"].to_pandas()
67
+
68
+ if "Unnamed: 0" in df.columns:
69
+ df = df.drop(columns=["Unnamed: 0"])
70
+
71
+ server_col = _pick_col(df, ["server"])
72
+ fs_col = _pick_col(df, ["filesystem"])
73
+ blocks_col = _pick_col(df, ["1k-blocks", "1k blocks", "blocks"])
74
+ used_col = _pick_col(df, ["used"])
75
+ avail_col = _pick_col(df, ["available", "avail"])
76
+ mount_col = _pick_col(df, ["mounted", "mounted on", "mount", "mountpoint"])
77
+
78
+ required = [server_col, fs_col, blocks_col, used_col, avail_col]
79
+ if any(c is None for c in required):
80
+ raise ValueError(f"Missing required columns. Found: {list(df.columns)}")
81
+
82
+ for c in [blocks_col, used_col, avail_col]:
83
+ df[c] = pd.to_numeric(df[c], errors="coerce")
84
+
85
+ # Label
86
+ if mount_col is not None:
87
+ df["Label"] = df[server_col].astype(str) + " • " + df[mount_col].astype(str)
88
+ else:
89
+ df["Label"] = df[server_col].astype(str) + " • " + df[fs_col].astype(str)
90
+
91
+ # Totals & pct (compute ourselves)
92
+ df["Total_kb"] = df[used_col] + df[avail_col]
93
+ df["Used_pct"] = (df[used_col] / df["Total_kb"]) * 100.0
94
+ df["Used_pct"] = df["Used_pct"].clip(0, 100)
95
+ df["Avail_pct"] = (100.0 - df["Used_pct"]).clip(0, 100)
96
+
97
+ # Sizes in TiB (shown as "TB")
98
+ df["Used_TB"] = _kblocks_to_tib(df[used_col])
99
+ df["Avail_TB"] = _kblocks_to_tib(df[avail_col])
100
+ df["Total_TB"] = _kblocks_to_tib(df["Total_kb"])
101
+
102
+ # Alert rows
103
+ df["ALERT"] = df["Used_pct"] > alert_threshold_pct
104
+
105
+ # Sort by total desc
106
+ df = df.sort_values("Total_kb", ascending=False).reset_index(drop=True)
107
+
108
+ # Display text
109
+ used_text = [f"{u:.1f} TB ({p:.0f}%)" for u, p in zip(df["Used_TB"], df["Used_pct"])]
110
+ total_text = [f"{t:.1f} TB" for t in df["Total_TB"]]
111
+ avail_text = [f"{a:.1f} TB" for a in df["Avail_TB"]]
112
+
113
+ # Pro palette + alert accent
114
+ COLOR_TOTAL = "#CBD5E1" # slate-300
115
+ COLOR_USED = "#2563EB" # blue-600
116
+ COLOR_FREE = "#94A3B8" # slate-400
117
+ COLOR_ALERT = "#F59E0B" # amber-500 (dashboard alert)
118
+ COLOR_OKTXT = "#0F172A" # slate-900
119
+ COLOR_ALTXT = "#B45309" # amber-700
120
+
121
+ # Used color per row (highlight alerts)
122
+ used_colors = np.where(df["ALERT"].to_numpy(), COLOR_ALERT, COLOR_USED)
123
+
124
+ # Add an icon to the y label for alerts
125
+ y_labels = np.where(df["ALERT"].to_numpy(), "⚠ " + df["Label"], df["Label"])
126
+
127
+ fig = go.Figure()
128
+
129
+ # Gray background (hover shows AVAILABLE)
130
+ fig.add_trace(
131
+ go.Bar(
132
+ y=y_labels,
133
+ x=[100] * len(df),
134
+ base=0,
135
+ name="(hover) Available",
136
+ orientation="h",
137
+ marker=dict(color=COLOR_TOTAL),
138
+ opacity=0.40,
139
+ hovertemplate="<b>%{y}</b><br>Available: %{customdata}<br><extra></extra>",
140
+ customdata=avail_text,
141
+ showlegend=False,
142
+ )
143
+ )
144
+
145
+ # Used (colored per-row; alert if >99%)
146
+ fig.add_trace(
147
+ go.Bar(
148
+ y=y_labels,
149
+ x=df["Used_pct"],
150
+ base=0,
151
+ name=f"Used (>{alert_threshold_pct:.0f}% highlighted)",
152
+ orientation="h",
153
+ marker=dict(color=used_colors),
154
+ text=used_text,
155
+ textposition="inside",
156
+ insidetextanchor="middle",
157
+ hovertemplate=(
158
+ "<b>%{y}</b><br>"
159
+ "Used: %{customdata[0]} (%{customdata[3]:.2f}%)<br>"
160
+ "Available: %{customdata[1]}<br>"
161
+ "Total: %{customdata[2]}<br>"
162
+ "<extra></extra>"
163
+ ),
164
+ customdata=np.stack(
165
+ [
166
+ df["Used_TB"].to_numpy(),
167
+ df["Avail_TB"].to_numpy(),
168
+ df["Total_TB"].to_numpy(),
169
+ df["Used_pct"].to_numpy(),
170
+ ],
171
+ axis=1,
172
+ ),
173
+ )
174
+ )
175
+
176
+ # Available
177
+ fig.add_trace(
178
+ go.Bar(
179
+ y=y_labels,
180
+ x=df["Avail_pct"],
181
+ base=df["Used_pct"],
182
+ name="Available",
183
+ orientation="h",
184
+ marker=dict(color=COLOR_FREE),
185
+ hovertemplate=(
186
+ "<b>%{y}</b><br>"
187
+ "Available: %{customdata[0]}<br>"
188
+ "Used: %{customdata[1]}<br>"
189
+ "Total: %{customdata[2]}<br>"
190
+ "<extra></extra>"
191
+ ),
192
+ customdata=np.stack(
193
+ [
194
+ df["Avail_TB"].map(lambda v: f"{v:.2f} TB").to_numpy(),
195
+ df["Used_TB"].map(lambda v: f"{v:.2f} TB").to_numpy(),
196
+ df["Total_TB"].map(lambda v: f"{v:.2f} TB").to_numpy(),
197
+ ],
198
+ axis=1,
199
+ ),
200
+ )
201
+ )
202
+
203
+ # Total annotation at far right (color it if alert)
204
+ for lbl, ttxt, is_alert in zip(y_labels, total_text, df["ALERT"].to_numpy()):
205
+ fig.add_annotation(
206
+ x=100,
207
+ y=lbl,
208
+ text=ttxt,
209
+ showarrow=False,
210
+ xanchor="left",
211
+ yanchor="middle",
212
+ xshift=6,
213
+ font=dict(color=(COLOR_ALTXT if is_alert else "#334155")),
214
+ )
215
+
216
+ fig.update_layout(
217
+ barmode="overlay",
218
+ template="plotly_white",
219
+ title=f"Disk usage (alerts: Used > {alert_threshold_pct:.0f}%)",
220
+ xaxis=dict(range=[0, 100], ticksuffix="%", title="Percent of total"),
221
+ yaxis_title="",
222
+ height=max(420, 28 * len(df)),
223
+ margin=dict(l=280, r=120, t=60, b=40),
224
+ legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
225
+ )
226
+ fig.update_yaxes(autorange="reversed")
227
+ return fig, df
228
+
229
+
230
+
231
+ ##### PLOT ALL #####
232
  def plot_figs():
233
+ fig_gpus, dfn = plot_gpus()
234
+ fig_disks, dfh = plot_disks()
235
+ return fig_gpus, dfn, fig_disks
 
 
 
 
 
236
 
237
  demo = gr.Interface(
238
  fn=plot_figs,
 
242
  outputs = [
243
  gr.Plot(label="GPU Status", elem_classes="plotcss"),
244
  gr.Dataframe(label="GPU Status Details"),
245
+ gr.Plot(label="Disk Status", elem_classes="plotcss"),
246
  ],
247
  live=True,
248
  flagging_options=[],