Spaces:
Build error
Build error
rough work done; now to trim back, analyze, and then publish
Browse files- app.py +261 -143
- poetry.lock +31 -1
- pyproject.toml +1 -0
app.py
CHANGED
|
@@ -1,53 +1,17 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
import numpy as np
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
-
# Load the spaces.parquet file as a dataframe
|
| 6 |
-
df = pd.read_parquet("spaces.parquet")
|
| 7 |
"""
|
| 8 |
Todos:
|
| 9 |
-
|
| 10 |
-
plotly graph showing the growth of spaces over time
|
| 11 |
-
plotly graph showing the breakdown of spaces by sdk
|
| 12 |
-
plotly graph of colors
|
| 13 |
-
plotly graph of emojis
|
| 14 |
-
Plotly graph of hardware
|
| 15 |
-
Investigate README lengths
|
| 16 |
-
bar chart of the number of spaces per author
|
| 17 |
-
Is there a correlation between pinning a space and the number of likes?
|
| 18 |
-
Is a correlation between the emoji and the number of likes?
|
| 19 |
-
distribution of python versions
|
| 20 |
-
what models are most used
|
| 21 |
-
what organizations are most popular in terms of their models and datasets being used
|
| 22 |
-
most duplicated spaces
|
| 23 |
-
|
| 24 |
-
"id",
|
| 25 |
-
"author",
|
| 26 |
-
"created_at",
|
| 27 |
-
"last_modified",
|
| 28 |
-
"subdomain",
|
| 29 |
-
"host",
|
| 30 |
-
"likes",
|
| 31 |
-
"sdk",
|
| 32 |
-
"tags",
|
| 33 |
-
"readme_size",
|
| 34 |
-
"python_version",
|
| 35 |
-
"license",
|
| 36 |
-
"duplicated_from",
|
| 37 |
-
"models",
|
| 38 |
-
"datasets",
|
| 39 |
-
"emoji",
|
| 40 |
-
"colorFrom",
|
| 41 |
-
"colorTo",
|
| 42 |
-
"pinned",
|
| 43 |
-
"stage",
|
| 44 |
-
"hardware",
|
| 45 |
-
"devMode",
|
| 46 |
-
"custom_domains",
|
| 47 |
"""
|
| 48 |
|
| 49 |
|
| 50 |
-
def filtered_df(emoji, likes, author, hardware, tags, models, datasets):
|
| 51 |
_df = df
|
| 52 |
# if emoji is not none, filter the dataframe with it
|
| 53 |
if emoji:
|
|
@@ -80,118 +44,272 @@ def filtered_df(emoji, likes, author, hardware, tags, models, datasets):
|
|
| 80 |
)
|
| 81 |
)
|
| 82 |
]
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
|
|
|
| 94 |
)
|
| 95 |
|
| 96 |
-
|
| 97 |
-
df["url"] = np.where(
|
| 98 |
-
df["custom_domains"].isnull(),
|
| 99 |
-
df["id"],
|
| 100 |
-
df["custom_domains"],
|
| 101 |
-
)
|
| 102 |
-
emoji = gr.Dropdown(
|
| 103 |
-
df["emoji"].unique().tolist(), label="Search by Emoji 🤗", multiselect=True
|
| 104 |
-
) # Dropdown to select the emoji
|
| 105 |
-
likes = gr.Slider(
|
| 106 |
-
minimum=df["likes"].min(),
|
| 107 |
-
maximum=df["likes"].max(),
|
| 108 |
-
step=1,
|
| 109 |
-
label="Filter by Likes",
|
| 110 |
-
) # Slider to filter by likes
|
| 111 |
-
hardware = gr.Dropdown(
|
| 112 |
-
df["hardware"].unique().tolist(), label="Search by Hardware", multiselect=True
|
| 113 |
-
)
|
| 114 |
-
author = gr.Dropdown(
|
| 115 |
-
df["author"].unique().tolist(), label="Search by Author", multiselect=True
|
| 116 |
-
)
|
| 117 |
-
# get the list of unique strings in the sdk_tags column
|
| 118 |
-
sdk_tags = np.unique(np.concatenate(df["sdk_tags"].values))
|
| 119 |
-
# create a dropdown for the sdk_tags
|
| 120 |
-
sdk_tags = gr.Dropdown(
|
| 121 |
-
sdk_tags.tolist(), label="Filter by SDK/Tags", multiselect=True
|
| 122 |
-
)
|
| 123 |
-
# create a gradio checkbox group for hardware
|
| 124 |
-
hardware = gr.CheckboxGroup(
|
| 125 |
-
df["hardware"].unique().tolist(), label="Filter by Hardware"
|
| 126 |
-
)
|
| 127 |
|
| 128 |
-
space_license = gr.CheckboxGroup(
|
| 129 |
-
df["license"].unique().tolist(), label="Filter by license"
|
| 130 |
-
)
|
| 131 |
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
lambda x: np.array(["None"]) if np.ndim(x) == 0 else x
|
| 135 |
-
)
|
| 136 |
-
# Now, flatten all arrays into one list
|
| 137 |
-
flattened_strings = np.concatenate(array_column_as_lists.values)
|
| 138 |
-
# Get unique strings
|
| 139 |
-
unique_strings = np.unique(flattened_strings)
|
| 140 |
-
# Convert to a list if needed
|
| 141 |
-
unique_strings_list = unique_strings.tolist()
|
| 142 |
-
models = gr.Dropdown(
|
| 143 |
-
unique_strings_list,
|
| 144 |
-
label="Search by Model",
|
| 145 |
-
multiselect=True,
|
| 146 |
-
)
|
| 147 |
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
|
|
|
|
|
|
|
|
|
| 152 |
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
]
|
| 181 |
-
]
|
| 182 |
-
)
|
| 183 |
-
df["url"] = df["url"].apply(
|
| 184 |
-
lambda x: (
|
| 185 |
-
f"<a target='_blank' href=https://huggingface.co/spaces/{x}>{x}</a>"
|
| 186 |
-
if x is not None and "/" in x
|
| 187 |
-
else f"<a target='_blank' href=https://{x[0]}>{x[0]}</a>"
|
| 188 |
)
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
|
| 196 |
|
| 197 |
demo.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
import numpy as np
|
| 4 |
+
import plotly.express as px
|
| 5 |
+
# Load the spaces.parquet file as a dataframe and do some pre cleaning steps
|
| 6 |
+
|
| 7 |
|
|
|
|
|
|
|
| 8 |
"""
|
| 9 |
Todos:
|
| 10 |
+
Clean up existing filtering code
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
"""
|
| 12 |
|
| 13 |
|
| 14 |
+
def filtered_df(emoji, likes, author, hardware, tags, models, datasets, space_licenses):
|
| 15 |
_df = df
|
| 16 |
# if emoji is not none, filter the dataframe with it
|
| 17 |
if emoji:
|
|
|
|
| 44 |
)
|
| 45 |
)
|
| 46 |
]
|
| 47 |
+
if space_licenses:
|
| 48 |
+
_df = _df[
|
| 49 |
+
_df["licenses"].apply(
|
| 50 |
+
lambda x: (
|
| 51 |
+
any(space_license in x for space_license in space_licenses)
|
| 52 |
+
if x is not None
|
| 53 |
+
else False
|
| 54 |
+
)
|
| 55 |
+
)
|
| 56 |
+
]
|
| 57 |
|
| 58 |
+
# rename the columns names to make them more readable
|
| 59 |
+
_df = _df.rename(
|
| 60 |
+
columns={
|
| 61 |
+
'url': 'URL',
|
| 62 |
+
'likes': 'Likes',
|
| 63 |
+
"r_models": "Models",
|
| 64 |
+
"r_datasets": "Datasets",
|
| 65 |
+
"r_licenses": "Licenses",
|
| 66 |
+
}
|
| 67 |
)
|
| 68 |
|
| 69 |
+
return _df[["URL", "Likes", "Models", "Datasets", "Licenses" ]]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
+
with gr.Blocks(fill_width=True) as demo:
|
| 73 |
+
with gr.Tab(label="Spaces Overview"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
+
# The Pandas dataframe has a datetime column. Plot the growth of spaces (row entries) over time.
|
| 76 |
+
# The x-axis should be the date and the y-axis should be the cumulative number of spaces created up to that date .
|
| 77 |
+
df = pd.read_parquet("spaces.parquet")
|
| 78 |
+
df = df.sort_values("created_at")
|
| 79 |
+
df['cumulative_spaces'] = df['created_at'].rank(method='first').astype(int)
|
| 80 |
+
fig1 = px.line(df, x='created_at', y='cumulative_spaces', title='Growth of Spaces Over Time', labels={'created_at': 'Date', 'cumulative_spaces': 'Number of Spaces'}, template='plotly_dark')
|
| 81 |
+
gr.Plot(fig1)
|
| 82 |
|
| 83 |
+
# Create a pie charge showing the distribution of spaces by SDK
|
| 84 |
+
fig2 = px.pie(df, names='sdk', title='Distribution of Spaces by SDK', template='plotly_dark')
|
| 85 |
+
gr.Plot(fig2)
|
| 86 |
+
|
| 87 |
+
# create a pie chart showing the distribution of spaces by emoji for the top 10 used emojis
|
| 88 |
+
emoji_counts = df['emoji'].value_counts().head(10).reset_index()
|
| 89 |
+
fig3 = px.pie(emoji_counts, names='emoji', values='count', title='Distribution of Spaces by Emoji', template='plotly_dark')
|
| 90 |
+
gr.Plot(fig3)
|
| 91 |
+
|
| 92 |
+
# Create a dataframe with the top 10 authors and the number of spaces they have created
|
| 93 |
+
author_counts = df['author'].value_counts().head(20).reset_index()
|
| 94 |
+
author_counts.columns = ['Author', 'Number of Spaces']
|
| 95 |
+
gr.DataFrame(author_counts)
|
| 96 |
+
|
| 97 |
+
# Create a scatter plot showing the relationship between the number of likes and the number of spaces created by an author
|
| 98 |
+
author_likes = df.groupby('author').agg({'likes': 'sum', 'id': 'count'}).reset_index()
|
| 99 |
+
fig4 = px.scatter(author_likes, x='id', y='likes', title='Relationship between Number of Spaces Created and Number of Likes', labels={'id': 'Number of Spaces Created', 'likes': 'Number of Likes'}, hover_data={'author': True}, template='plotly_dark')
|
| 100 |
+
gr.Plot(fig4)
|
| 101 |
|
| 102 |
+
# Create a scatter plot showing the relationship between the number of likes and the number of spaces created by an author
|
| 103 |
+
emoji_likes = df.groupby('emoji').agg({'likes': 'sum', 'id': 'count'}).sort_values(by='likes', ascending=False).head(20).reset_index()
|
| 104 |
+
fig10 = px.scatter(emoji_likes, x='id', y='likes', title='Relationship between Number of Spaces Created and Number of Likes', labels={'id': 'Number of Spaces Created', 'likes': 'Number of Likes'}, hover_data={'emoji': True}, template='plotly_dark')
|
| 105 |
+
gr.Plot(fig10)
|
| 106 |
+
|
| 107 |
+
# Create a bar chart of hardware in use
|
| 108 |
+
hardware = df['hardware'].value_counts().reset_index()
|
| 109 |
+
hardware.columns = ['Hardware', 'Number of Spaces']
|
| 110 |
+
fig5 = px.bar(hardware, x='Hardware', y='Number of Spaces', title='Hardware in Use', labels={'Hardware': 'Hardware', 'Number of Spaces': 'Number of Spaces (log scale)'}, color='Hardware', template='plotly_dark')
|
| 111 |
+
fig5.update_layout(yaxis_type='log')
|
| 112 |
+
gr.Plot(fig5)
|
| 113 |
+
|
| 114 |
+
models = np.concatenate([arr for arr in df['models'].values if arr is not None])
|
| 115 |
+
model_count = {}
|
| 116 |
+
model_author_count = {}
|
| 117 |
+
for model in models:
|
| 118 |
+
author = model.split('/')[0]
|
| 119 |
+
if model in model_count:
|
| 120 |
+
model_count[model] += 1
|
| 121 |
+
else:
|
| 122 |
+
model_count[model] = 1
|
| 123 |
+
if author in model_author_count:
|
| 124 |
+
model_author_count[author] += 1
|
| 125 |
+
else:
|
| 126 |
+
model_author_count[author] = 1
|
| 127 |
+
model_author_count = pd.DataFrame(model_author_count.items(), columns=['Model Author', 'Number of Spaces'])
|
| 128 |
+
fig8 = px.bar(model_author_count.sort_values('Number of Spaces', ascending=False).head(20), x='Model Author', y='Number of Spaces', title='Most Popular Model Authors', labels={'Model': 'Model', 'Number of Spaces': 'Number of Spaces'}, template='plotly_dark')
|
| 129 |
+
gr.Plot(fig8)
|
| 130 |
+
model_count = pd.DataFrame(model_count.items(), columns=['Model', 'Number of Spaces'])
|
| 131 |
+
# then make a bar chart
|
| 132 |
+
fig6 = px.bar(model_count.sort_values('Number of Spaces', ascending=False).head(20), x='Model', y='Number of Spaces', title='Most Used Models', labels={'Model': 'Model', 'Number of Spaces': 'Number of Spaces'}, template='plotly_dark')
|
| 133 |
+
gr.Plot(fig6)
|
| 134 |
+
|
| 135 |
+
datasets = np.concatenate([arr for arr in df['datasets'].values if arr is not None])
|
| 136 |
+
dataset_count = {}
|
| 137 |
+
dataset_author_count = {}
|
| 138 |
+
for dataset in datasets:
|
| 139 |
+
author = dataset.split('/')[0]
|
| 140 |
+
if dataset in dataset_count:
|
| 141 |
+
dataset_count[dataset] += 1
|
| 142 |
+
else:
|
| 143 |
+
dataset_count[dataset] = 1
|
| 144 |
+
if author in dataset_author_count:
|
| 145 |
+
dataset_author_count[author] += 1
|
| 146 |
+
else:
|
| 147 |
+
dataset_author_count[author] = 1
|
| 148 |
+
dataset_count = pd.DataFrame(dataset_count.items(), columns=['Datasets', 'Number of Spaces'])
|
| 149 |
+
dataset_author_count = pd.DataFrame(dataset_author_count.items(), columns=['Dataset Author', 'Number of Spaces'])
|
| 150 |
+
fig9 = px.bar(dataset_author_count.sort_values('Number of Spaces', ascending=False).head(20), x='Dataset Author', y='Number of Spaces', title='Most Popular Dataset Authors', labels={'Dataset Author': 'Dataset Author', 'Number of Spaces': 'Number of Spaces'}, template='plotly_dark')
|
| 151 |
+
gr.Plot(fig9)
|
| 152 |
+
# then make a bar chart
|
| 153 |
+
fig7 = px.bar(dataset_count.sort_values('Number of Spaces', ascending=False).head(20), x='Datasets', y='Number of Spaces', title='Most Used Datasets', labels={'Datasets': 'Datasets', 'Number of Spaces': 'Number of Spaces'}, template='plotly_dark')
|
| 154 |
+
gr.Plot(fig7)
|
| 155 |
+
|
| 156 |
+
# Get the most duplicated spaces
|
| 157 |
+
duplicated_spaces = df['duplicated_from'].value_counts().head(20).reset_index()
|
| 158 |
+
duplicated_spaces.columns = ['Space', 'Number of Duplicates']
|
| 159 |
+
gr.DataFrame(duplicated_spaces)
|
| 160 |
+
|
| 161 |
+
# Get the most duplicated spaces
|
| 162 |
+
liked_spaces = df[['id', 'likes']].sort_values(by='likes', ascending=False).head(20)
|
| 163 |
+
liked_spaces.columns = ['Space', 'Number of Likes']
|
| 164 |
+
gr.DataFrame(liked_spaces)
|
| 165 |
+
|
| 166 |
+
# Get the spaces with the longest READMEs
|
| 167 |
+
readme_sizes = df[['id', 'readme_size']].sort_values(by='readme_size', ascending=False).head(20)
|
| 168 |
+
readme_sizes.columns = ['Space', 'Longest READMEs']
|
| 169 |
+
gr.DataFrame(readme_sizes)
|
| 170 |
+
|
| 171 |
+
with gr.Tab(label="Spaces Search"):
|
| 172 |
+
df = pd.read_parquet("spaces.parquet")
|
| 173 |
+
df = df[df["stage"] == "RUNNING"]
|
| 174 |
+
# combine the sdk and tags columns, one of which is a string and the other is an array of strings
|
| 175 |
+
# first convert the sdk column to an array of strings
|
| 176 |
+
df["sdk"] = df["sdk"].apply(lambda x: np.array([str(x)]))
|
| 177 |
+
df["licenses"] = df["license"].apply(
|
| 178 |
+
lambda x: np.array([str(x)]) if x is None else x
|
| 179 |
+
)
|
| 180 |
+
# then combine the sdk and tags columns so that their elements are together
|
| 181 |
+
df["sdk_tags"] = df[["sdk", "tags"]].apply(
|
| 182 |
+
lambda x: np.concatenate((x.iloc[0], x.iloc[1])), axis=1
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
df['emoji'] = np.where(df['emoji'].isnull(), '', df['emoji'])
|
| 186 |
+
|
| 187 |
+
# where the custom_domains column is not null, use that as the url, otherwise, use the host column
|
| 188 |
+
df["url"] = np.where(
|
| 189 |
+
df["custom_domains"].isnull(),
|
| 190 |
+
df["id"],
|
| 191 |
+
df["custom_domains"],
|
| 192 |
+
)
|
| 193 |
+
df["url"] = df[["url", "emoji"]].apply(
|
| 194 |
+
lambda x: (
|
| 195 |
+
f"<a target='_blank' href=https://huggingface.co/spaces/{x.iloc[0]}>{str(x.iloc[1]) + " " + x.iloc[0]}</a>"
|
| 196 |
+
if x.iloc[0] is not None and "/" in x.iloc[0]
|
| 197 |
+
else f"<a target='_blank' href=https://{x.iloc[0][0]}>{str(x.iloc[1]) + " " + x.iloc[0][0]}</a>"
|
| 198 |
+
),
|
| 199 |
+
axis=1,
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
# Make all of this human readable
|
| 203 |
+
df["r_models"] = [', '.join(models) if models is not None else '' for models in df["models"]]
|
| 204 |
+
df["r_sdk_tags"] = [', '.join(sdk_tags) if sdk_tags is not None else '' for sdk_tags in df["sdk_tags"]]
|
| 205 |
+
df["r_datasets"] = [', '.join(datasets) if datasets is not None else '' for datasets in df["datasets"]]
|
| 206 |
+
df["r_licenses"] = [', '.join(licenses) if licenses is not None else '' for licenses in df["licenses"]]
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
emoji = gr.Dropdown(
|
| 210 |
+
df["emoji"].unique().tolist(), label="Search by Emoji 🤗", multiselect=True
|
| 211 |
+
) # Dropdown to select the emoji
|
| 212 |
+
likes = gr.Slider(
|
| 213 |
+
minimum=df["likes"].min(),
|
| 214 |
+
maximum=df["likes"].max(),
|
| 215 |
+
step=1,
|
| 216 |
+
label="Filter by Likes",
|
| 217 |
+
) # Slider to filter by likes
|
| 218 |
+
hardware = gr.Dropdown(
|
| 219 |
+
df["hardware"].unique().tolist(), label="Search by Hardware", multiselect=True
|
| 220 |
+
)
|
| 221 |
+
author = gr.Dropdown(
|
| 222 |
+
df["author"].unique().tolist(), label="Search by Author", multiselect=True
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
# get the list of unique strings in the sdk_tags column
|
| 227 |
+
sdk_tags = np.unique(np.concatenate(df["sdk_tags"].values))
|
| 228 |
+
# create a dropdown for the sdk_tags
|
| 229 |
+
sdk_tags = gr.Dropdown(
|
| 230 |
+
sdk_tags.tolist(), label="Filter by SDK/Tags", multiselect=True
|
| 231 |
+
)
|
| 232 |
+
# create a gradio checkbox group for hardware
|
| 233 |
+
hardware = gr.CheckboxGroup(
|
| 234 |
+
df["hardware"].unique().tolist(), label="Filter by Hardware"
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
licenses = np.unique(np.concatenate(df["licenses"].values))
|
| 238 |
+
space_license = gr.CheckboxGroup(licenses.tolist(), label="Filter by license")
|
| 239 |
+
|
| 240 |
+
# If the models column is none make it an array of "none" so that things don't break
|
| 241 |
+
models_column_to_list = df["models"].apply(
|
| 242 |
+
lambda x: np.array(["None"]) if np.ndim(x) == 0 else x
|
| 243 |
+
)
|
| 244 |
+
# Now, flatten all arrays into one list
|
| 245 |
+
models_flattened = np.concatenate(models_column_to_list.values)
|
| 246 |
+
# Get unique strings
|
| 247 |
+
unique_models = np.unique(models_flattened)
|
| 248 |
+
models = gr.Dropdown(
|
| 249 |
+
unique_models.tolist(),
|
| 250 |
+
label="Search by Model",
|
| 251 |
+
multiselect=True,
|
| 252 |
+
)
|
| 253 |
+
|
| 254 |
+
# Do the same for datasets that we did for models
|
| 255 |
+
datasets_column_to_list = df["datasets"].apply(
|
| 256 |
+
lambda x: np.array(["None"]) if np.ndim(x) == 0 else x
|
| 257 |
+
)
|
| 258 |
+
flattened_datasets = np.concatenate(datasets_column_to_list.values)
|
| 259 |
+
unique_datasets = np.unique(flattened_datasets)
|
| 260 |
+
datasets = gr.Dropdown(
|
| 261 |
+
unique_datasets.tolist(),
|
| 262 |
+
label="Search by Dataset",
|
| 263 |
+
multiselect=True,
|
| 264 |
+
)
|
| 265 |
+
|
| 266 |
+
devMode = gr.Checkbox(value=False, label="DevMode Enabled")
|
| 267 |
+
clear = gr.ClearButton(components=[
|
| 268 |
+
emoji,
|
| 269 |
+
author,
|
| 270 |
+
hardware,
|
| 271 |
+
sdk_tags,
|
| 272 |
+
models,
|
| 273 |
+
datasets,
|
| 274 |
+
space_license
|
| 275 |
+
])
|
| 276 |
+
|
| 277 |
+
df = pd.DataFrame(
|
| 278 |
+
df[
|
| 279 |
+
[
|
| 280 |
+
"id",
|
| 281 |
+
"emoji",
|
| 282 |
+
"author",
|
| 283 |
+
"url",
|
| 284 |
+
"likes",
|
| 285 |
+
"hardware",
|
| 286 |
+
"sdk_tags",
|
| 287 |
+
"models",
|
| 288 |
+
"datasets",
|
| 289 |
+
"licenses",
|
| 290 |
+
"r_sdk_tags",
|
| 291 |
+
"r_models",
|
| 292 |
+
"r_datasets",
|
| 293 |
+
"r_licenses",
|
| 294 |
+
]
|
| 295 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 296 |
)
|
| 297 |
+
gr.DataFrame(
|
| 298 |
+
filtered_df,
|
| 299 |
+
inputs=[
|
| 300 |
+
emoji,
|
| 301 |
+
likes,
|
| 302 |
+
author,
|
| 303 |
+
hardware,
|
| 304 |
+
sdk_tags,
|
| 305 |
+
models,
|
| 306 |
+
datasets,
|
| 307 |
+
space_license,
|
| 308 |
+
],
|
| 309 |
+
datatype="html",
|
| 310 |
+
wrap=True,
|
| 311 |
+
column_widths=["25%", "5%", "25%", "25%", "20%"]
|
| 312 |
+
)
|
| 313 |
|
| 314 |
|
| 315 |
demo.launch()
|
poetry.lock
CHANGED
|
@@ -1648,6 +1648,21 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa
|
|
| 1648 |
typing = ["typing-extensions"]
|
| 1649 |
xmp = ["defusedxml"]
|
| 1650 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1651 |
[[package]]
|
| 1652 |
name = "pyarrow"
|
| 1653 |
version = "17.0.0"
|
|
@@ -2093,6 +2108,21 @@ anyio = ">=3.4.0,<5"
|
|
| 2093 |
[package.extras]
|
| 2094 |
full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7)", "pyyaml"]
|
| 2095 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2096 |
[[package]]
|
| 2097 |
name = "tomlkit"
|
| 2098 |
version = "0.12.0"
|
|
@@ -2519,4 +2549,4 @@ multidict = ">=4.0"
|
|
| 2519 |
[metadata]
|
| 2520 |
lock-version = "2.0"
|
| 2521 |
python-versions = "^3.12"
|
| 2522 |
-
content-hash = "
|
|
|
|
| 1648 |
typing = ["typing-extensions"]
|
| 1649 |
xmp = ["defusedxml"]
|
| 1650 |
|
| 1651 |
+
[[package]]
|
| 1652 |
+
name = "plotly"
|
| 1653 |
+
version = "5.24.0"
|
| 1654 |
+
description = "An open-source, interactive data visualization library for Python"
|
| 1655 |
+
optional = false
|
| 1656 |
+
python-versions = ">=3.8"
|
| 1657 |
+
files = [
|
| 1658 |
+
{file = "plotly-5.24.0-py3-none-any.whl", hash = "sha256:0e54efe52c8cef899f7daa41be9ed97dfb6be622613a2a8f56a86a0634b2b67e"},
|
| 1659 |
+
{file = "plotly-5.24.0.tar.gz", hash = "sha256:eae9f4f54448682442c92c1e97148e3ad0c52f0cf86306e1b76daba24add554a"},
|
| 1660 |
+
]
|
| 1661 |
+
|
| 1662 |
+
[package.dependencies]
|
| 1663 |
+
packaging = "*"
|
| 1664 |
+
tenacity = ">=6.2.0"
|
| 1665 |
+
|
| 1666 |
[[package]]
|
| 1667 |
name = "pyarrow"
|
| 1668 |
version = "17.0.0"
|
|
|
|
| 2108 |
[package.extras]
|
| 2109 |
full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7)", "pyyaml"]
|
| 2110 |
|
| 2111 |
+
[[package]]
|
| 2112 |
+
name = "tenacity"
|
| 2113 |
+
version = "9.0.0"
|
| 2114 |
+
description = "Retry code until it succeeds"
|
| 2115 |
+
optional = false
|
| 2116 |
+
python-versions = ">=3.8"
|
| 2117 |
+
files = [
|
| 2118 |
+
{file = "tenacity-9.0.0-py3-none-any.whl", hash = "sha256:93de0c98785b27fcf659856aa9f54bfbd399e29969b0621bc7f762bd441b4539"},
|
| 2119 |
+
{file = "tenacity-9.0.0.tar.gz", hash = "sha256:807f37ca97d62aa361264d497b0e31e92b8027044942bfa756160d908320d73b"},
|
| 2120 |
+
]
|
| 2121 |
+
|
| 2122 |
+
[package.extras]
|
| 2123 |
+
doc = ["reno", "sphinx"]
|
| 2124 |
+
test = ["pytest", "tornado (>=4.5)", "typeguard"]
|
| 2125 |
+
|
| 2126 |
[[package]]
|
| 2127 |
name = "tomlkit"
|
| 2128 |
version = "0.12.0"
|
|
|
|
| 2549 |
[metadata]
|
| 2550 |
lock-version = "2.0"
|
| 2551 |
python-versions = "^3.12"
|
| 2552 |
+
content-hash = "462f1993751686e196fc4b665537755237673c202245650318bcdcfbd89485ea"
|
pyproject.toml
CHANGED
|
@@ -11,6 +11,7 @@ python = "^3.12"
|
|
| 11 |
gradio = "^4.42.0"
|
| 12 |
datasets = "^2.21.0"
|
| 13 |
pandas = "^2.2.2"
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
[build-system]
|
|
|
|
| 11 |
gradio = "^4.42.0"
|
| 12 |
datasets = "^2.21.0"
|
| 13 |
pandas = "^2.2.2"
|
| 14 |
+
plotly = "^5.24.0"
|
| 15 |
|
| 16 |
|
| 17 |
[build-system]
|