Spaces:
Runtime error
Runtime error
Farhan1572
commited on
Commit
•
10dc1f6
1
Parent(s):
c121d90
Update app.py
Browse files
app.py
CHANGED
@@ -10,40 +10,147 @@ api_key = os.getenv("OPENAI_API_KEY")
|
|
10 |
client = OpenAI(api_key = api_key)
|
11 |
|
12 |
# finetuned model instance
|
13 |
-
finetuned_model = "ft:gpt-3.5-turbo-0125:personal::
|
14 |
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
def humanize_text(AI_text):
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
|
38 |
# Gradio interface definition
|
39 |
interface = gr.Interface(
|
40 |
-
fn=
|
41 |
inputs="textbox",
|
42 |
outputs="textbox",
|
43 |
title="AI Text Humanizer",
|
44 |
description="Enter AI-generated text and get a human-written version.",
|
45 |
)
|
46 |
|
|
|
47 |
# Launch the Gradio app
|
48 |
interface.launch(debug = True)
|
49 |
|
|
|
10 |
client = OpenAI(api_key = api_key)
|
11 |
|
12 |
# finetuned model instance
|
13 |
+
finetuned_model = "ft:gpt-3.5-turbo-0125:personal::9rMWxuyj"
|
14 |
|
15 |
+
|
16 |
+
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
|
21 |
+
# text processing functions
|
22 |
+
def random_capitalize(word):
|
23 |
+
if word.isalpha() and random.random() < 0.1:
|
24 |
+
return word.capitalize()
|
25 |
+
return word
|
26 |
+
|
27 |
+
def random_remove_punctuation(text):
|
28 |
+
if random.random() < 0.2:
|
29 |
+
text = list(text)
|
30 |
+
indices = [i for i, c in enumerate(text) if c in string.punctuation]
|
31 |
+
if indices:
|
32 |
+
remove_indices = random.sample(indices, min(3, len(indices)))
|
33 |
+
for idx in sorted(remove_indices, reverse=True):
|
34 |
+
text.pop(idx)
|
35 |
+
return ''.join(text)
|
36 |
+
return text
|
37 |
+
|
38 |
+
def random_double_period(text):
|
39 |
+
if random.random() < 0.2:
|
40 |
+
text = text.replace('.', '..', 3)
|
41 |
+
return text
|
42 |
+
|
43 |
+
def random_double_space(text):
|
44 |
+
if random.random() < 0.2:
|
45 |
+
words = text.split()
|
46 |
+
for _ in range(min(3, len(words) - 1)):
|
47 |
+
idx = random.randint(0, len(words) - 2)
|
48 |
+
words[idx] += ' '
|
49 |
+
return ' '.join(words)
|
50 |
+
return text
|
51 |
+
|
52 |
+
def random_replace_comma_space(text, period_replace_percentage=0.33):
|
53 |
+
|
54 |
+
# Count occurrences
|
55 |
+
comma_occurrences = text.count(", ")
|
56 |
+
period_occurrences = text.count(". ")
|
57 |
+
|
58 |
+
# Replacements
|
59 |
+
replace_count_comma = max(1, comma_occurrences // 3)
|
60 |
+
replace_count_period = max(1, period_occurrences // 3)
|
61 |
+
|
62 |
+
# Find indices
|
63 |
+
comma_indices = [i for i in range(len(text)) if text.startswith(", ", i)]
|
64 |
+
period_indices = [i for i in range(len(text)) if text.startswith(". ", i)]
|
65 |
+
|
66 |
+
# Sample indices
|
67 |
+
replace_indices_comma = random.sample(comma_indices, min(replace_count_comma, len(comma_indices)))
|
68 |
+
replace_indices_period = random.sample(period_indices, min(replace_count_period, len(period_indices)))
|
69 |
+
|
70 |
+
# Apply replacements
|
71 |
+
for idx in sorted(replace_indices_comma + replace_indices_period, reverse=True):
|
72 |
+
if text.startswith(", ", idx):
|
73 |
+
text = text[:idx] + " ," + text[idx + 2:]
|
74 |
+
if text.startswith(". ", idx):
|
75 |
+
text = text[:idx] + " ." + text[idx + 2:]
|
76 |
+
|
77 |
+
return text
|
78 |
+
|
79 |
+
def transform_paragraph(paragraph):
|
80 |
+
words = paragraph.split()
|
81 |
+
if len(words) > 12:
|
82 |
+
words = [random_capitalize(word) for word in words]
|
83 |
+
|
84 |
+
transformed_paragraph = ' '.join(words)
|
85 |
+
transformed_paragraph = random_remove_punctuation(transformed_paragraph)
|
86 |
+
transformed_paragraph = random_double_period(transformed_paragraph)
|
87 |
+
transformed_paragraph = random_double_space(transformed_paragraph)
|
88 |
+
transformed_paragraph = random_replace_comma_space(transformed_paragraph)
|
89 |
+
else:
|
90 |
+
transformed_paragraph = paragraph
|
91 |
+
|
92 |
+
transformed_paragraph = transformed_paragraph.replace("#", "*")
|
93 |
+
transformed_paragraph = transformed_paragraph.replace("*", "")
|
94 |
+
# transformed_paragraph = transformed_paragraph.replace(", ", " ,")
|
95 |
+
|
96 |
+
return transformed_paragraph
|
97 |
+
|
98 |
+
def transform_text(text):
|
99 |
+
paragraphs = text.split('\n')
|
100 |
+
transformed_paragraphs = [transform_paragraph(paragraph) for paragraph in paragraphs]
|
101 |
+
return '\n'.join(transformed_paragraphs)
|
102 |
+
|
103 |
+
# function to humanize text
|
104 |
def humanize_text(AI_text):
|
105 |
+
"""Humanizes the provided AI text using the fine-tuned model."""
|
106 |
+
response = client.chat.completions.create(
|
107 |
+
model=finetuned_model,
|
108 |
+
temperature = 0.89,
|
109 |
+
messages=[
|
110 |
+
{"role": "system", "content": """
|
111 |
+
You are a text humanizer.
|
112 |
+
You humanize AI generated text.
|
113 |
+
The text must appear like humanly written.
|
114 |
+
THE INPUT AND THE OUTPUT HEADINGS MUST BE SAME. NO HEADING SHOULD BE MISSED.
|
115 |
+
NAMES LIKE NOVEL NAME SHOULD REMAIN INTACT WITHOUT ANY CHANGE.
|
116 |
+
THE INPUT AND THE OUTPUT TEXT SHOULD HAVE THE SAME FORMAT.
|
117 |
+
THE INPUT AND THE OUTPUT SHOULD HAVE SAME WORD COUNT.
|
118 |
+
THE OUTPUT SENTENCES MUST NOT BE SIMPLE. THEY SHOULD BE COMPOUND, COMPLEX, OR COMPOUND COMPLEX.
|
119 |
+
THE HEADINGS AND THE BULLETS IN THE INPUT SHOULD REMAIN IN PLACE"""},
|
120 |
+
{"role": "system", "content": "YOU ARE TEXT HUMANIZER BUT YOU DO NOT REDUCE THE LENGTH OF THE SENTENCES. YOUR OUTPUT SENTENCES ARE OF EXACTLY THE SAME LENGTH AS THE INPUT"},
|
121 |
+
{"role": "user", "content": f"THE LANGUAGE OF THE INPUT AND THE OUTPUT MUST BE SAME. THE SENTENCES SHOULD NOT BE SHORT LENGTH - THEY SHOULD BE SAME AS IN THE INPUT. ALSO THE PARAGRAPHS SHOULD NOT BE SHORT EITHER - PARAGRAPHS MUST HAVE THE SAME LENGTH"},
|
122 |
+
{"role": "user", "content": f"DESPITE INSTRUCTIONS, YOU OFTEN BREAK THE SENTENCES INTO SMALLER MEANINGLESS SENTENCES, WHICH IS NOT ALLOWED. THE LENGTH OF THE SENTENCES IN OUTPUT SHOULD BE SAME AS THE INPUT!!!!"},
|
123 |
+
{"role": "assistant", "content": "It is an industry-specific term. Therefore its understanding needs context. Here this context is used to explain SCAR. The explanation will proceed as follows."},
|
124 |
+
{"role": "user", "content": "YOU AGAIN USED VERY SHORT AND MEANINGLESS SENTENCES WHICH ARE PROHIBITED!!!!!. The correct output should be: SCAR is regulatory measure. It juxtaposes an insurance company's available capital to needed capital. This for potential losses those derived from various risks. This capital is what's left when liabilities are subtracted from assets. "},
|
125 |
+
{"role": "assistant", "content": "Solvency simulation. A fundamental dimension in risk control for insurance corporations."},
|
126 |
+
{"role": "user", "content": "You again constructed short sentences despite guidance. The ideal format should be: Solvency modeling it's essential for insurance companies' risk control"},
|
127 |
+
{"role": "user", "content": "Use as many conjunctions and punctuations to make the sentence long. COMPOUND, COMPLEX, OR COMPOUND COMPLEX sentences are required"},
|
128 |
+
{"role": "user", "content": f"Humanize the text. Keep the output format i.e. the bullets and the headings as it is and don't use the list of words that are not permissible. \nTEXT: {AI_text}"}
|
129 |
+
]
|
130 |
+
)
|
131 |
+
|
132 |
+
return response.choices[0].message.content.strip()
|
133 |
+
|
134 |
+
|
135 |
+
|
136 |
+
def main_function(AI_text)
|
137 |
+
humanized_text = humanize_text(AI_text)
|
138 |
+
humanized_text= transform_text(humanized_text)
|
139 |
+
return humanized_text
|
140 |
+
|
141 |
+
|
142 |
|
143 |
|
144 |
# Gradio interface definition
|
145 |
interface = gr.Interface(
|
146 |
+
fn=main_function,
|
147 |
inputs="textbox",
|
148 |
outputs="textbox",
|
149 |
title="AI Text Humanizer",
|
150 |
description="Enter AI-generated text and get a human-written version.",
|
151 |
)
|
152 |
|
153 |
+
|
154 |
# Launch the Gradio app
|
155 |
interface.launch(debug = True)
|
156 |
|