Spaces:
Running
Running
FoodDesert
commited on
Commit
•
72cd75e
1
Parent(s):
22f7149
Upload 2 files
Browse files- app.py +28 -7
- word_rating_probabilities.csv +0 -0
app.py
CHANGED
@@ -22,10 +22,10 @@ faq_content="""
|
|
22 |
|
23 |
## What is the purpose of this tool?
|
24 |
|
25 |
-
When you enter a txt2img prompt
|
26 |
If it finds any that are not, it recommends some valid e621 tags you can use to replace them in the "Unseen Tags" table.
|
27 |
Additionally, in the "Top Artists" text box, it lists the artists who would most likely draw an image having the set of tags you provided,
|
28 |
-
in case you want to look them up to get more ideas.
|
29 |
|
30 |
## Does input order matter?
|
31 |
|
@@ -59,7 +59,8 @@ So for example, the query "red fox, red fox, red fox, score:7" will yield a list
|
|
59 |
than the query "red fox, score:7".
|
60 |
|
61 |
## Why is this space tagged "not-for-all-audience"
|
62 |
-
The "not-for-all-audience" tag informs users that this tool's text output is derived from e621.net data for tag prediction and completion.
|
|
|
63 |
|
64 |
## How is the artist list calculated?
|
65 |
|
@@ -88,6 +89,8 @@ A similarity weight slider value of 0 means that only the FastText model's predi
|
|
88 |
"""
|
89 |
|
90 |
|
|
|
|
|
91 |
grammar=r"""
|
92 |
!start: (prompt | /[][():]/+)*
|
93 |
prompt: (emphasized | plain | comma | WHITESPACE)*
|
@@ -154,6 +157,19 @@ with h5py.File('conditional_tag_probabilities_matrix.h5', 'r') as f:
|
|
154 |
conditional_smoothing = 100. / conditional_doc_count
|
155 |
|
156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
def clean_tag(tag):
|
158 |
return ''.join(char for char in tag if ord(char) < 128)
|
159 |
|
@@ -219,7 +235,7 @@ def geometric_mean_given_words(target_word, context_words, co_occurrence_matrix,
|
|
219 |
return geometric_mean
|
220 |
|
221 |
|
222 |
-
def find_similar_tags(test_tags, similarity_weight):
|
223 |
|
224 |
#Initialize stuff
|
225 |
if not hasattr(find_similar_tags, "fasttext_small_model"):
|
@@ -261,6 +277,10 @@ def find_similar_tags(test_tags, similarity_weight):
|
|
261 |
result.append((similar_tag.replace('_', ' '), round(similarity, 3)))
|
262 |
seen.add(similar_tag)
|
263 |
|
|
|
|
|
|
|
|
|
264 |
#Adjust score based on context
|
265 |
for i in range(len(result)):
|
266 |
word, score = result[i] # Unpack the tuple
|
@@ -284,7 +304,7 @@ def find_similar_tags(test_tags, similarity_weight):
|
|
284 |
|
285 |
return results_data # Return list of lists for Dataframe
|
286 |
|
287 |
-
def find_similar_artists(new_tags_string, top_n, similarity_weight):
|
288 |
try:
|
289 |
new_tags_string = new_tags_string.lower()
|
290 |
new_tags_string, removed_tags = remove_special_tags(new_tags_string)
|
@@ -296,7 +316,7 @@ def find_similar_artists(new_tags_string, top_n, similarity_weight):
|
|
296 |
new_image_tags = [tag.replace('_', ' ').replace('\\(', '(').replace('\\)', ')').strip() for tag in new_image_tags]
|
297 |
|
298 |
###unseen_tags = list(set(OrderedDict.fromkeys(new_image_tags)) - set(vectorizer.vocabulary_.keys())) #We may want this line again later. These are the tags that were not used to calculate the artists list.
|
299 |
-
unseen_tags_data = find_similar_tags(new_image_tags, similarity_weight)
|
300 |
|
301 |
X_new_image = vectorizer.transform([','.join(new_image_tags + removed_tags)])
|
302 |
similarities = cosine_similarity(X_new_image, X_artist)[0]
|
@@ -317,7 +337,8 @@ iface = gr.Interface(
|
|
317 |
inputs=[
|
318 |
gr.Textbox(label="Enter image tags", placeholder="e.g. fox, outside, detailed background, ..."),
|
319 |
gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Number of artists"),
|
320 |
-
gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label="Similarity weight")
|
|
|
321 |
],
|
322 |
outputs=[
|
323 |
gr.Dataframe(label="Unseen Tags", headers=["Tag", "Similar Tags", "Similarity"]),
|
|
|
22 |
|
23 |
## What is the purpose of this tool?
|
24 |
|
25 |
+
When you enter a txt2img prompt and press the "submit" button, the Tagset Completer parses your prompt and checks that all your tags are valid e621 tags.
|
26 |
If it finds any that are not, it recommends some valid e621 tags you can use to replace them in the "Unseen Tags" table.
|
27 |
Additionally, in the "Top Artists" text box, it lists the artists who would most likely draw an image having the set of tags you provided,
|
28 |
+
in case you want to look them up to get more ideas. This is useful to align your prompt with the expected input to an e621-trained model.
|
29 |
|
30 |
## Does input order matter?
|
31 |
|
|
|
59 |
than the query "red fox, score:7".
|
60 |
|
61 |
## Why is this space tagged "not-for-all-audience"
|
62 |
+
The "not-for-all-audience" tag informs users that this tool's text output is derived from e621.net data for tag prediction and completion.
|
63 |
+
The app will try not to display nsfw tags unless the "Allow NSFW Tags" is checked, but the filter is not perfect.
|
64 |
|
65 |
## How is the artist list calculated?
|
66 |
|
|
|
89 |
"""
|
90 |
|
91 |
|
92 |
+
nsfw_threshold = 0.95 # Assuming the threshold value is defined here
|
93 |
+
|
94 |
grammar=r"""
|
95 |
!start: (prompt | /[][():]/+)*
|
96 |
prompt: (emphasized | plain | comma | WHITESPACE)*
|
|
|
157 |
conditional_smoothing = 100. / conditional_doc_count
|
158 |
|
159 |
|
160 |
+
nsfw_tags = set() # Initialize an empty set to store words meeting the threshold
|
161 |
+
# Open and read the CSV file
|
162 |
+
with open("word_rating_probabilities.csv", 'r', newline='', encoding='utf-8') as csvfile:
|
163 |
+
reader = csv.reader(csvfile)
|
164 |
+
next(reader, None) # Skip the header row
|
165 |
+
for row in reader:
|
166 |
+
word = row[0] # The word is in the first column
|
167 |
+
probability_sum = float(row[1]) # The sum of probabilities is in the second column, convert to float for comparison
|
168 |
+
# Check if the probability sum meets the threshold and add the word to the set if it does
|
169 |
+
if probability_sum >= nsfw_threshold:
|
170 |
+
nsfw_tags.add(word)
|
171 |
+
|
172 |
+
|
173 |
def clean_tag(tag):
|
174 |
return ''.join(char for char in tag if ord(char) < 128)
|
175 |
|
|
|
235 |
return geometric_mean
|
236 |
|
237 |
|
238 |
+
def find_similar_tags(test_tags, similarity_weight, allow_nsfw_tags):
|
239 |
|
240 |
#Initialize stuff
|
241 |
if not hasattr(find_similar_tags, "fasttext_small_model"):
|
|
|
277 |
result.append((similar_tag.replace('_', ' '), round(similarity, 3)))
|
278 |
seen.add(similar_tag)
|
279 |
|
280 |
+
#Remove NSFW tags if appropriate.
|
281 |
+
if not allow_nsfw_tags:
|
282 |
+
result = [(word, score) for word, score in result if word.replace(' ','_') not in nsfw_tags]
|
283 |
+
|
284 |
#Adjust score based on context
|
285 |
for i in range(len(result)):
|
286 |
word, score = result[i] # Unpack the tuple
|
|
|
304 |
|
305 |
return results_data # Return list of lists for Dataframe
|
306 |
|
307 |
+
def find_similar_artists(new_tags_string, top_n, similarity_weight, allow_nsfw_tags):
|
308 |
try:
|
309 |
new_tags_string = new_tags_string.lower()
|
310 |
new_tags_string, removed_tags = remove_special_tags(new_tags_string)
|
|
|
316 |
new_image_tags = [tag.replace('_', ' ').replace('\\(', '(').replace('\\)', ')').strip() for tag in new_image_tags]
|
317 |
|
318 |
###unseen_tags = list(set(OrderedDict.fromkeys(new_image_tags)) - set(vectorizer.vocabulary_.keys())) #We may want this line again later. These are the tags that were not used to calculate the artists list.
|
319 |
+
unseen_tags_data = find_similar_tags(new_image_tags, similarity_weight, allow_nsfw_tags)
|
320 |
|
321 |
X_new_image = vectorizer.transform([','.join(new_image_tags + removed_tags)])
|
322 |
similarities = cosine_similarity(X_new_image, X_artist)[0]
|
|
|
337 |
inputs=[
|
338 |
gr.Textbox(label="Enter image tags", placeholder="e.g. fox, outside, detailed background, ..."),
|
339 |
gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Number of artists"),
|
340 |
+
gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label="Similarity weight"),
|
341 |
+
gr.Checkbox(label="Allow NSFW Tags", value=False)
|
342 |
],
|
343 |
outputs=[
|
344 |
gr.Dataframe(label="Unseen Tags", headers=["Tag", "Similar Tags", "Similarity"]),
|
word_rating_probabilities.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|