Spaces:
Runtime error
Runtime error
""" | |
nohup python3 app.py & | |
export GOOGLE_APPLICATION_CREDENTIALS="gcp_creds.json" | |
""" | |
import gc | |
import re | |
import uuid | |
import json | |
from typing import Dict | |
from collections import defaultdict | |
from datetime import date, datetime | |
import nltk | |
import torch | |
import numpy as np | |
import gradio as gr | |
import language_tool_python | |
from scipy.special import softmax | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
from google.cloud import storage | |
if gr.NO_RELOAD: | |
from humanize import humanize_text, device | |
# humanize_text = None | |
# device = None | |
from utils import remove_special_characters, split_text_allow_complete_sentences_nltk | |
from google_search import google_search, months, domain_list, build_date | |
from ai_generate import generate, citations_to_html, remove_citations, display_cited_text, llm_wrapper | |
from youtube import transcribe | |
# nltk.download("punkt_tab") | |
print(f"Using device: {device}") | |
print("Loading AI detection models...") | |
models = { | |
"Polygraf AI (Base Model)": AutoModelForSequenceClassification.from_pretrained( | |
"polygraf-ai/bc-roberta-openai-2sent" | |
).to(device), | |
"Polygraf AI (Advanced Model)": AutoModelForSequenceClassification.from_pretrained( | |
"polygraf-ai/bc_combined_3sent" | |
).to(device), | |
} | |
tokenizers = { | |
"Polygraf AI (Base Model)": AutoTokenizer.from_pretrained("polygraf-ai/bc-roberta-openai-2sent"), | |
"Polygraf AI (Advanced Model)": AutoTokenizer.from_pretrained("polygraf-ai/bc_combined_3sent"), | |
} | |
# grammar correction tool | |
tool = language_tool_python.LanguageTool("en-US") | |
# source detection model | |
MC_TOKEN_SIZE = 256 | |
TEXT_MC_MODEL_PATH = "polygraf-ai/mc-model" | |
MC_LABEL_MAP = ["OpenAI GPT", "Mistral", "CLAUDE", "Gemini", "Grammar Enhancer"] | |
text_mc_tokenizer = AutoTokenizer.from_pretrained(TEXT_MC_MODEL_PATH) | |
print("Loading Source detection model...") | |
text_mc_model = AutoModelForSequenceClassification.from_pretrained(TEXT_MC_MODEL_PATH).to(device) | |
def generate_cited_html(cited_text, citations: dict): | |
cited_text = cited_text.replace("\n", "<br>") | |
html_code = """ | |
<style> | |
.reference-container { | |
position: relative; | |
display: inline-block; | |
} | |
.reference-btn { | |
display: inline-block; | |
width: 20px; /* Reduced width */ | |
height: 20px; /* Reduced height */ | |
border-radius: 50%; | |
background-color: #e33a89; /* Pink color for the button */ | |
color: white; | |
text-align: center; | |
line-height: 20px; /* Adjusted line-height */ | |
cursor: pointer; | |
font-weight: bold; | |
margin-right: 5px; | |
transition: background-color 0.3s ease, transform 0.3s ease; | |
} | |
.reference-btn:hover { | |
background-color: #ff69b4; /* Lighter pink on hover */ | |
transform: scale(1.1); /* Slightly enlarge on hover */ | |
} | |
.reference-popup { | |
display: none; | |
position: absolute; | |
z-index: 1; | |
top: 100%; | |
background-color: #f9f9f9; | |
border: 1px solid #ddd; | |
padding: 15px; | |
border-radius: 4px; | |
box-shadow: 0 2px 5px rgba(0,0,0,0.2); | |
width: calc(min(90vw, 400px)); | |
max-height: calc(min(80vh, 300px)); | |
overflow-y: auto; | |
} | |
.reference-popup .close-btn { | |
float: right; | |
cursor: pointer; | |
font-weight: bold; | |
color: white; | |
font-size: 16px; | |
padding: 0; | |
width: 20px; | |
height: 20px; | |
text-align: center; | |
line-height: 20px; | |
background-color: #ff4c4c; | |
border-radius: 2px; | |
transition: transform 0.3s ease, background-color 0.3s ease; | |
} | |
.reference-popup .close-btn:hover { | |
transform: scale(1.2); | |
background-color: #ff3333; | |
} | |
input[type="radio"] { | |
position: absolute; | |
opacity: 0; | |
pointer-events: none; | |
} | |
input[type="radio"]:checked + .reference-popup { | |
display: block; | |
} | |
/* Additional styling for distinct sections */ | |
.reference-popup strong { | |
font-weight: bold; | |
color: #333; | |
display: block; | |
margin-bottom: 5px; | |
} | |
.reference-popup p { | |
margin: 0 0 10px 0; | |
padding: 0; | |
} | |
.reference-popup .source { | |
margin-bottom: 10px; | |
font-size: 14px; | |
font-weight: bold; | |
color: #1e90ff; | |
} | |
.reference-popup .content { | |
margin-bottom: 10px; | |
font-size: 13px; | |
color: #555; | |
} | |
@media (prefers-color-scheme: dark) { | |
.reference-btn { | |
background-color: #1e90ff; | |
} | |
.reference-popup { | |
background-color: #2c2c2c; | |
border-color: #444; | |
color: #f1f1f1; | |
} | |
.reference-popup .close-btn { | |
background-color: #ff4c4c; | |
} | |
.reference-popup .close-btn:hover { | |
background-color: #ff3333; | |
} | |
.reference-popup strong { | |
color: #ddd; | |
} | |
.reference-popup .source { | |
color: #1e90ff; | |
} | |
.reference-popup .content { | |
color: #bbb; | |
} | |
} | |
</style> | |
<script> | |
document.addEventListener('click', (event) => { | |
const containers = document.querySelectorAll('.reference-container'); | |
containers.forEach(container => { | |
const rect = container.getBoundingClientRect(); | |
const popup = container.querySelector('.reference-popup'); | |
// Reset alignment | |
popup.style.left = ''; | |
popup.style.right = ''; | |
const popupWidth = popup.offsetWidth; | |
const viewportWidth = window.innerWidth; | |
// If the popup would go off the right edge | |
if (rect.right + popupWidth > viewportWidth) { | |
popup.style.right = '0'; // Align popup to the right | |
} | |
// If the popup would go off the left edge | |
else if (rect.left - popupWidth < 0) { | |
popup.style.left = '0'; // Align popup to the left | |
} | |
// Otherwise center it | |
else { | |
popup.style.left = '50%'; | |
popup.style.transform = 'translateX(-50%)'; // Center the popup | |
} | |
}); | |
}); | |
function closeReferencePanes() { | |
document.querySelectorAll('input[name="reference"]').forEach((input) => { | |
input.checked = false; | |
}); | |
} | |
</script> | |
<div style="height: 600px; overflow-y: auto; overflow-x: auto;"> | |
""" | |
# Function to replace each citation with a reference button | |
citation_numbers = {} | |
next_number = 1 | |
citation_count = 0 # To track unique instances of each citation | |
references = "<b>References:</b><br><br>" | |
def replace_citations(match): | |
nonlocal citation_count, next_number, references | |
citation_id = match.group(1) # Extract citation number from the match | |
ref_data = citations.get(int(citation_id)) | |
# If reference data is not found, return the original text | |
if not ref_data: | |
return match.group(0) | |
# Getting PDF file from gradio path | |
if "/var/tmp/gradio/" in ref_data["source"]: | |
ref_data["source"] = ref_data["source"].split("/")[-1] | |
# remove new line artifacts from scraping / parsing | |
ref_data["content"] = ref_data["content"].replace("\n", " ") | |
# Check if source is a URL, make it clickable if so | |
if ref_data["source"].startswith("http"): | |
source_html = f'<a href="{ref_data["source"]}" target="_blank" class="source">{ref_data["source"]}</a>' | |
else: | |
source_html = f'<span class="source">{ref_data["source"]}</span>' | |
if citation_id not in citation_numbers: | |
citation_numbers[citation_id] = next_number | |
source = ref_data["source"] | |
content = ref_data["content"] | |
references += f"[{next_number}] {source}<br>- {content}<br><br>" | |
next_number += 1 | |
citation_number = citation_numbers[citation_id] | |
# Unique id for each reference button and popup | |
unique_id = f"{citation_id}-{citation_count}" | |
citation_count += 1 | |
# HTML code for the reference button and popup with formatted content | |
button_html = f""" | |
<span class="reference-container"> | |
<label for="ref-toggle-{unique_id}" class="reference-btn" onclick="closeReferencePanes(); document.getElementById('ref-toggle-{unique_id}').checked = true;">{citation_number}</label> | |
<input type="radio" id="ref-toggle-{unique_id}" name="reference" /> | |
<span class="reference-popup"> | |
<span class="close-btn" onclick="document.getElementById('ref-toggle-{unique_id}').checked = false;">×</span> | |
<strong>Source:</strong> {source_html} | |
<strong>Content:</strong> <p class="content">{ref_data["content"]}</p> | |
</span> | |
</span> | |
""" | |
return button_html | |
# Replace inline citations in the text with the generated HTML | |
html_code += re.sub(r"<(\d+)>", replace_citations, cited_text) | |
html_code += "<br><br>" + references | |
html_code += "</div>" | |
return html_code | |
# Function to move model to the appropriate device | |
def to_device(model): | |
return model.to(device) | |
def copy_to_input(text): | |
return text | |
def remove_bracketed_numbers(text): | |
pattern = r"^\[\d+\]" | |
cleaned_text = re.sub(pattern, "", text) | |
return cleaned_text | |
def clean_text(text: str) -> str: | |
paragraphs = text.split("\n\n") | |
cleaned_paragraphs = [] | |
for paragraph in paragraphs: | |
cleaned = re.sub(r"\s+", " ", paragraph).strip() | |
cleaned = re.sub(r"(?<=\.) ([a-z])", lambda x: x.group(1).upper(), cleaned) | |
cleaned_paragraphs.append(cleaned) | |
cleaned_paragraphs = [item for item in cleaned_paragraphs if item.strip()] | |
return "\n\n".join(cleaned_paragraphs) | |
def format_references(text: str) -> str: | |
body, references = split_text_from_refs(text) | |
return body + references | |
def split_text_from_refs(text: str, sep="\n"): | |
lines = text.split("\n") | |
references = [] | |
article_text = [] | |
index_pattern = re.compile(r"\[(\d+)\]") | |
in_references = False | |
for line in lines: | |
if line == "": | |
continue | |
match = re.search(r"[Rr]eferences:", line, re.DOTALL) | |
if line.strip().lower() == "references" or line.strip().lower() == "references:": | |
in_references = True | |
continue | |
if line.strip().lower().startswith("references:"): | |
in_references = True | |
if match: | |
in_references = True | |
line = line[match.end() :] | |
if in_references: | |
matches = index_pattern.split(line) | |
for match in matches: | |
if match.strip() and not match.isdigit() and not match.strip().lower().startswith("references:"): | |
references.append(match.strip()) | |
else: | |
article_text.append(line.strip()) | |
if len(references) > 0: | |
formatted_refs = [] | |
for i, ref in enumerate(references, 1): | |
ref = remove_bracketed_numbers(ref) | |
formatted_refs.append(f"[{i}] {ref}{sep}") | |
formatted_refs = f"{sep}{sep}References:{sep}{sep}" + f"{sep}".join(formatted_refs) | |
else: | |
formatted_refs = "" | |
body = f"{sep}{sep}".join(article_text) | |
return body, formatted_refs | |
def ends_with_references(text): | |
# Define a regular expression pattern for variations of "References:" | |
pattern = re.compile(r"\b[Rr]eferences:\s*$", re.IGNORECASE | re.MULTILINE) | |
# Check if the text ends with any form of "References:" | |
return bool(pattern.search(text.strip())) | |
def format_and_correct_language_check(text: str) -> str: | |
return tool.correct(text) | |
def predict(model, tokenizer, text): | |
text = remove_special_characters(text) | |
bc_token_size = 256 | |
with torch.no_grad(): | |
model.eval() | |
tokens = tokenizer( | |
text, | |
padding="max_length", | |
truncation=True, | |
max_length=bc_token_size, | |
return_tensors="pt", | |
).to(device) | |
output = model(**tokens) | |
output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0] | |
output_norm = {"HUMAN": output_norm[0], "AI": output_norm[1]} | |
torch.cuda.empty_cache() | |
gc.collect() | |
return output_norm | |
def ai_generated_test(text, model="BC Original"): | |
return predict(models[model], tokenizers[model], text) | |
def detection_polygraf(text, model="BC Original"): | |
# sentences = split_into_sentences(text) | |
sentences = nltk.sent_tokenize(text) | |
num_sentences = len(sentences) | |
scores = defaultdict(list) | |
overall_scores = [] | |
# Process each chunk of 3 sentences and store the score for each sentence in the chunk | |
for i in range(num_sentences): | |
chunk = " ".join(sentences[i : i + 3]) | |
if chunk: | |
# result = classifier(chunk) | |
result = ai_generated_test(chunk, model) | |
score = result["AI"] | |
for j in range(i, min(i + 3, num_sentences)): | |
scores[j].append(score) | |
# Calculate the average score for each sentence and apply color coding | |
paragraphs = text.split("\n") | |
paragraphs = [s for s in paragraphs if s.strip()] | |
colored_paragraphs = [] | |
i = 0 | |
for paragraph in paragraphs: | |
temp_sentences = nltk.sent_tokenize(paragraph) | |
colored_sentences = [] | |
for sentence in temp_sentences: | |
if scores[i]: | |
avg_score = sum(scores[i]) / len(scores[i]) | |
if avg_score >= 0.70: | |
colored_sentence = f"<span style='background-color:red;'>{sentence}</span>" | |
elif avg_score >= 0.55: | |
colored_sentence = f"<span style='background-color:GoldenRod;'>{sentence}</span>" | |
else: | |
colored_sentence = sentence | |
colored_sentences.append(colored_sentence) | |
overall_scores.append(avg_score) | |
i = i + 1 | |
combined_sentences = " ".join(colored_sentences) | |
colored_paragraphs.append(combined_sentences) | |
overall_score = sum(overall_scores) / len(overall_scores) | |
overall_score = {"HUMAN": 1 - overall_score, "AI": overall_score} | |
return overall_score, "<br><br>".join(colored_paragraphs) | |
ai_check_options = [ | |
"Polygraf AI (Base Model)", | |
"Polygraf AI (Advanced Model)", | |
] | |
def predict_mc(text): | |
with torch.no_grad(): | |
text_mc_model.eval() | |
tokens = text_mc_tokenizer( | |
text, | |
padding="max_length", | |
truncation=True, | |
return_tensors="pt", | |
max_length=MC_TOKEN_SIZE, | |
).to(device) | |
output = text_mc_model(**tokens) | |
output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0] | |
torch.cuda.empty_cache() | |
gc.collect() | |
return output_norm | |
def predict_mc_scores(input, bc_score): | |
mc_scores = [] | |
segments_mc = split_text_allow_complete_sentences_nltk(input, type_det="mc", tokenizer=text_mc_tokenizer) | |
samples_len_mc = len(split_text_allow_complete_sentences_nltk(input, type_det="mc", tokenizer=text_mc_tokenizer)) | |
for i in range(samples_len_mc): | |
cleaned_text_mc = remove_special_characters(segments_mc[i]) | |
mc_score = predict_mc(cleaned_text_mc) | |
mc_scores.append(mc_score) | |
mc_scores_array = np.array(mc_scores) | |
average_mc_scores = np.mean(mc_scores_array, axis=0) | |
mc_score_list = average_mc_scores.tolist() | |
mc_score = {} | |
for score, label in zip(mc_score_list, MC_LABEL_MAP): | |
mc_score[label.upper()] = score | |
sum_prob = 1 - bc_score["HUMAN"] | |
for key, value in mc_score.items(): | |
mc_score[key] = value * sum_prob | |
print("MC Score:", mc_score) | |
if sum_prob < 0.01: | |
mc_score = {} | |
return mc_score | |
def highlighter_polygraf(text, model="Polygraf AI (Base Model)"): | |
text = remove_citations(text) | |
body, references = split_text_from_refs(text) | |
score, text = detection_polygraf(text=body, model=model) | |
mc_score = predict_mc_scores(body, score) # mc score | |
text = text + references.replace("\n", "<br>") | |
return score, text, mc_score | |
def ai_check(history: list, option: str): | |
text = history[-1][1] | |
if option.startswith("Polygraf AI"): | |
return highlighter_polygraf(text, option) | |
else: | |
return highlighter_polygraf(text, option) | |
def generate_prompt(settings: Dict[str, str]) -> str: | |
settings["keywords"] = [item for item in settings["keywords"] if item.strip()] | |
# - Add a "References" section in the format "References:" on a new line after the requested text, formatted as [1], [2], etc. with each source on their own line | |
prompt = f""" | |
Write a {settings['article_length']} words (around) {settings['format']} on {settings['topic']}.\n | |
""" | |
if settings["context"]: | |
prompt += f""" | |
Context: | |
- {settings['context']} | |
""" | |
prompt += f""" | |
Style and Tone: | |
- Writing style: {settings['writing_style']} | |
- Tone: {settings['tone']} | |
- Target audience: {settings['user_category']} | |
Content: | |
- Depth: {settings['depth_of_content']} | |
- Structure: {', '.join(settings['structure'])} | |
""" | |
if len(settings["keywords"]) > 0: | |
prompt += f""" | |
Keywords to incorporate: | |
{', '.join(settings['keywords'])} | |
""" | |
prompt += f""" | |
Additional requirements: | |
- Don't start with "Here is a...", start with the requested text directly | |
- End with a {settings['conclusion_type']} conclusion | |
- Do not make any headline, title bold. | |
- Ensure proper paragraph breaks for better readability. | |
- Avoid any references to artificial intelligence, language models, or the fact that this is generated by an AI, and do not mention something like here is the article etc. | |
- Adhere to any format structure provided to the system if any. | |
""" | |
return prompt | |
def regenerate_prompt(settings: Dict[str, str]) -> str: | |
prompt = f""" | |
I am a {settings['role']} | |
"{settings['generated_article']}" | |
Edit the given text based on user comments. | |
User Comments: | |
- {settings['user_comments']} | |
Requirements: | |
- Don't start with "Here is a...", start with the requested text directly | |
- The original content should not be changed. Make minor modifications based on user comments above. | |
- Keep the references the same as the given text in the same format. | |
- Do not make any headline, title bold. | |
Context: | |
- {settings['context']} | |
Ensure proper paragraph breaks for better readability. | |
Avoid any references to artificial intelligence, language models, or the fact that this is generated by an AI, and do not mention something like here is the article etc. | |
""" | |
return prompt | |
def generate_article( | |
input_role: str, | |
topic: str, | |
context: str, | |
keywords: str, | |
article_length: str, | |
format: str, | |
writing_style: str, | |
tone: str, | |
user_category: str, | |
depth_of_content: str, | |
structure: str, | |
references: str, | |
num_examples: str, | |
conclusion_type: str, | |
ai_model: str, | |
url_content: str = None, | |
api_key: str = None, | |
pdf_file_input: list[str] = None, | |
generated_article: str = None, | |
user_comments: str = None, | |
yt_content: str = None, | |
) -> str: | |
settings = { | |
"role": input_role, | |
"topic": topic, | |
"context": context, | |
"keywords": [k.strip() for k in keywords.split(",")], | |
"article_length": article_length, | |
"format": format, | |
"writing_style": writing_style, | |
"tone": tone, | |
"user_category": user_category, | |
"depth_of_content": depth_of_content, | |
"structure": [s.strip() for s in structure.split(",")], | |
"references": [r.strip() for r in references.split(",")], | |
"num_examples": num_examples, | |
"conclusion_type": conclusion_type, | |
"generated_article": generated_article, | |
"user_comments": user_comments, | |
} | |
if generated_article: | |
prompt = regenerate_prompt(settings) | |
else: | |
prompt = generate_prompt(settings) | |
print("Generated Prompt...\n", prompt) | |
article, citations = generate( | |
prompt=prompt, | |
input_role=input_role, | |
topic=topic, | |
context=context, | |
model=ai_model, | |
url_content=url_content, | |
path=pdf_file_input, | |
# path=["./final_report.pdf"], # TODO: reset | |
temperature=1, | |
max_length=2048, | |
api_key=api_key, | |
sys_message="", | |
yt_content=yt_content, | |
) | |
return article, citations | |
def get_history(history): | |
# return history | |
history_formatted = [] | |
for entry in history: | |
history_formatted.append((entry[0], entry[1])) | |
return history_formatted | |
def clear_history(): | |
# Return empty list for history state and display | |
return [], [] | |
def humanize( | |
model: str, | |
cited_text: str, | |
temperature: float = 1.2, | |
repetition_penalty: float = 1, | |
top_k: int = 50, | |
length_penalty: float = 1, | |
history=None, | |
) -> str: | |
print("Humanizing text...") | |
# body, references = split_text_from_refs(text) | |
cited_text = history[-1][1] | |
citations = history[-1][2] | |
article = humanize_text( | |
text=cited_text, | |
model_name=model, | |
temperature=temperature, | |
repetition_penalty=repetition_penalty, | |
top_k=top_k, | |
length_penalty=length_penalty, | |
) | |
# result = result + references | |
# corrected_text = format_and_correct_language_check(result) | |
article = clean_text(article) | |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
history.append((f"Humanized Text | {timestamp}\nInput: {model}", article, citations)) | |
latest_humanizer_data = { | |
"original text": cited_text, | |
"humanized text": article, | |
"citations": citations, # can remove saving citations | |
"metadata": { | |
"temperature": temperature, | |
"repetition_penalty": repetition_penalty, | |
"top_k": top_k, | |
"length_penalty": length_penalty, | |
}, | |
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), | |
} | |
return generate_cited_html(article, citations), history, latest_humanizer_data | |
def update_visibility_api(model: str): | |
if model in ["OpenAI GPT 3.5", "OpenAI GPT 4"]: | |
return gr.update(visible=True) | |
else: | |
return gr.update(visible=False) | |
# Function to update the default selected structure based on the selected format | |
def update_structure(format_choice): | |
# List of formats that should use "Plain Text" | |
plain_text_formats = [ | |
"TikTok Video Content", | |
"Instagram Video Content", | |
"LinkedIn post", | |
"X (Twitter) post", | |
"Facebook post", | |
"Email", | |
] | |
# Set the appropriate default structure based on the selected format | |
if format_choice in plain_text_formats: | |
return gr.update(value="Plain Text", interactive=True) | |
else: | |
return gr.update(value="Introduction, Body, Conclusion", interactive=True) | |
# Initialize Google Cloud Storage client | |
client = storage.Client() | |
bucket_name = "ai-source-detection" | |
bucket = client.bucket(bucket_name) | |
def save_to_cloud_storage( | |
article, | |
topic, | |
input_role, | |
context, | |
keywords, | |
article_length, | |
format, | |
writing_style, | |
tone, | |
user_category, | |
depth_of_content, | |
structure, | |
references, | |
num_examples, | |
conclusion_type, | |
ai_model, | |
url_content, | |
generated_article, | |
user_comments, | |
timestamp, | |
): | |
"""Save generated article and metadata to Google Cloud Storage within a specific folder.""" | |
# Create a unique filename | |
file_id = str(uuid.uuid4()) | |
# Define the file path and name in the bucket | |
folder_path = "ai-writer/" | |
file_name = f"{folder_path}{timestamp.replace(' ', '_').replace(':', '-')}_{file_id}.json" | |
# Create a dictionary with the article and all relevant metadata | |
data = { | |
"article": article, | |
"metadata": { | |
"topic": topic, | |
"input_role": input_role, | |
"context": context, | |
"keywords": keywords, | |
"article_length": article_length, | |
"format": format, | |
"writing_style": writing_style, | |
"tone": tone, | |
"user_category": user_category, | |
"depth_of_content": depth_of_content, | |
"structure": structure, | |
"references": references, | |
"num_examples": num_examples, | |
"conclusion_type": conclusion_type, | |
"ai_model": ai_model, | |
"url_content": url_content, | |
"generated_article": generated_article, | |
"user_comments": user_comments, | |
"timestamp": timestamp, | |
}, | |
} | |
# Convert data to JSON string | |
json_data = json.dumps(data) | |
# Create a blob and upload to GCS | |
blob = bucket.blob(file_name) | |
blob.upload_from_string(json_data, content_type="application/json") | |
return f"Data saved as {file_name} in GCS." | |
def save_humanizer_feedback_to_cloud_storage(data, humanizer_feedback): | |
"""Save generated article and metadata to Google Cloud Storage within a specific folder.""" | |
if data: | |
try: | |
data["user_feedback"] = humanizer_feedback | |
# Create a unique filename | |
file_id = str(uuid.uuid4()) | |
# Define the file path and name in the bucket | |
folder_path = "ai-writer/humanizer-feedback/" | |
file_name = f"{folder_path}{data['timestamp'].replace(' ', '_').replace(':', '-')}_{file_id}.json" | |
# Convert data to JSON string | |
json_data = json.dumps(data) | |
# Create a blob and upload to GCS | |
blob = bucket.blob(file_name) | |
blob.upload_from_string(json_data, content_type="application/json") | |
gr.Info("Successfully reported. Thank you for the feedback!") | |
except Exception: | |
gr.Warning("Report not saved.") | |
else: | |
gr.Warning("Nothing humanized to save yet!") | |
scholar_urls = [ | |
"arxiv.org", | |
"aclanthology.org", | |
"ieeexplore.ieee.org", | |
"researchgate.net", | |
# "scholar.google.com", | |
"springer.com", | |
# "sciencedirect.com", # 400 | |
# "onlinelibrary.wiley.com", # 400 | |
"jstor.org", # 400 | |
"semanticscholar.org", | |
"biorxiv.org", | |
"medrxiv.org", | |
"ssrn.com", | |
"pubmed.ncbi.nlm.nih.gov", | |
"cochranelibrary.com", | |
] | |
def generate_and_format( | |
input_role, | |
topic, | |
context, | |
keywords, | |
article_length, | |
format, | |
writing_style, | |
tone, | |
user_category, | |
depth_of_content, | |
structure, | |
references, | |
num_examples, | |
conclusion_type, | |
google_search_check, | |
scholar_mode_check, | |
year_from, | |
month_from, | |
day_from, | |
year_to, | |
month_to, | |
day_to, | |
domains_to_include, | |
include_sites, | |
exclude_sites, | |
pdf_file_input, | |
history=None, | |
yt_url: str = None, | |
ai_model="OpenAI GPT 4o", | |
api_key=None, | |
generated_article: str = None, | |
user_comments: str = None, | |
): | |
url_content = None | |
if google_search_check: | |
gr.Info("Searching internet for relevant content...") | |
date_from = build_date(year_from, month_from, day_from) | |
date_to = build_date(year_to, month_to, day_to) | |
sorted_date = f"date:r:{date_from}:{date_to}" | |
final_query = llm_wrapper( | |
input_role, topic, context, model="OpenAI GPT 4o", task_type="internet", temperature=0.7 | |
) | |
if scholar_mode_check: | |
# scholar_site_queries = [f"site:{site.strip()}" for site in scholar_urls] | |
# final_query += " " + " OR ".join(scholar_site_queries) | |
pass | |
else: | |
if include_sites: | |
site_queries = [f"site:{site.strip()}" for site in include_sites.split(",")] | |
final_query += " " + " OR ".join(site_queries) | |
if exclude_sites: | |
exclude_queries = [f"-site:{site.strip()}" for site in exclude_sites.split(",")] | |
final_query += " " + " ".join(exclude_queries) | |
print(f"Google Search Query: {final_query}") | |
url_content = google_search(final_query, sorted_date, domains_to_include, scholar_mode_check) | |
yt_content = {} | |
if yt_url: | |
gr.Info("Transcribing YouTube video...") | |
transcribed_text = transcribe(yt_url) | |
gr.Info("Transcription completed. Generating article...") | |
yt_content[yt_url] = transcribed_text | |
# topic_context = topic + ", " + context | |
article, citations = generate_article( | |
input_role, | |
topic, | |
context, | |
keywords, | |
article_length, | |
format, | |
writing_style, | |
tone, | |
user_category, | |
depth_of_content, | |
structure, | |
references, | |
num_examples, | |
conclusion_type, | |
ai_model, | |
url_content, | |
api_key, | |
pdf_file_input, | |
generated_article, | |
user_comments, | |
yt_content, | |
) | |
# if ends_with_references(article) and url_content is not None: | |
# for url in url_content.keys(): | |
# article += f"\n{url}" | |
article = clean_text(display_cited_text(article)) | |
# reference_formatted = format_references(article) | |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
history.append((f"Generated Text | {timestamp}\nInput: {topic}", article, citations)) | |
# Save the article and metadata to Cloud Storage | |
# We dont save if there is PDF input for privacy reasons | |
if pdf_file_input is None: | |
save_message = save_to_cloud_storage( | |
article, | |
topic, | |
input_role, | |
context, | |
keywords, | |
article_length, | |
format, | |
writing_style, | |
tone, | |
user_category, | |
depth_of_content, | |
structure, | |
references, | |
num_examples, | |
conclusion_type, | |
ai_model, | |
url_content, | |
generated_article, | |
user_comments, | |
timestamp, | |
) | |
print(save_message) | |
return generate_cited_html(article, citations), history | |
# def create_interface(): | |
with gr.Blocks( | |
theme=gr.themes.Default( | |
primary_hue=gr.themes.colors.pink, secondary_hue=gr.themes.colors.yellow, neutral_hue=gr.themes.colors.gray | |
), | |
css=""" | |
.input-highlight-pink block_label {background-color: #008080} | |
""", | |
) as demo: | |
history = gr.State([]) | |
latest_humanizer_data = gr.State() | |
today = date.today() | |
# dd/mm/YY | |
d1 = today.strftime("%d/%B/%Y") | |
d1 = d1.split("/") | |
gr.Markdown("# Polygraf AI Content Writer", elem_classes="text-center text-3xl mb-6") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
with gr.Group(): | |
gr.Markdown("## Article Configuration", elem_classes="text-xl mb-4") | |
input_role = gr.Textbox(label="I am a", placeholder="Enter your role", value="Student") | |
input_topic = gr.Textbox( | |
label="Topic", | |
placeholder="Enter the main topic of your article", | |
elem_classes="input-highlight-pink", | |
) | |
input_context = gr.Textbox( | |
label="Context", | |
placeholder="Provide some context for your topic", | |
elem_classes="input-highlight-pink", | |
) | |
input_keywords = gr.Textbox( | |
label="Keywords", | |
placeholder="Enter comma-separated keywords", | |
elem_classes="input-highlight-yellow", | |
) | |
with gr.Row(): | |
input_format = gr.Dropdown( | |
choices=[ | |
"Article", | |
"Essay", | |
"Blog post", | |
"Report", | |
"Research paper", | |
"News article", | |
"White paper", | |
"Email", | |
"LinkedIn post", | |
"X (Twitter) post", | |
"Instagram Video Content", | |
"TikTok Video Content", | |
"Facebook post", | |
], | |
value="Article", | |
label="Format", | |
elem_classes="input-highlight-turquoise", | |
) | |
input_length = gr.Slider( | |
minimum=50, | |
maximum=5000, | |
step=50, | |
value=300, | |
label="Article Length", | |
elem_classes="input-highlight-pink", | |
) | |
with gr.Row(): | |
input_writing_style = gr.Dropdown( | |
choices=[ | |
"Formal", | |
"Informal", | |
"Technical", | |
"Conversational", | |
"Journalistic", | |
"Academic", | |
"Creative", | |
], | |
value="Formal", | |
label="Writing Style", | |
elem_classes="input-highlight-yellow", | |
) | |
input_tone = gr.Dropdown( | |
choices=["Friendly", "Professional", "Neutral", "Enthusiastic", "Skeptical", "Humorous"], | |
value="Professional", | |
label="Tone", | |
elem_classes="input-highlight-turquoise", | |
) | |
input_user_category = gr.Dropdown( | |
choices=[ | |
"Students", | |
"Professionals", | |
"Researchers", | |
"General Public", | |
"Policymakers", | |
"Entrepreneurs", | |
], | |
value="General Public", | |
label="Target Audience", | |
elem_classes="input-highlight-pink", | |
) | |
input_depth = gr.Dropdown( | |
choices=[ | |
"Surface-level overview", | |
"Moderate analysis", | |
"In-depth research", | |
"Comprehensive study", | |
], | |
value="Moderate analysis", | |
label="Depth of Content", | |
elem_classes="input-highlight-yellow", | |
) | |
input_structure = gr.Dropdown( | |
choices=[ | |
"Introduction, Body, Conclusion", | |
"Abstract, Introduction, Methods, Results, Discussion, Conclusion", | |
"Executive Summary, Problem Statement, Analysis, Recommendations, Conclusion", | |
"Introduction, Literature Review, Methodology, Findings, Analysis, Conclusion", | |
"Plain Text", | |
], | |
value="Introduction, Body, Conclusion", | |
label="Structure", | |
elem_classes="input-highlight-turquoise", | |
interactive=True, | |
) | |
input_references = gr.Dropdown( | |
choices=[ | |
"Academic journals", | |
"Industry reports", | |
"Government publications", | |
"News outlets", | |
"Expert interviews", | |
"Case studies", | |
], | |
value="News outlets", | |
label="References", | |
elem_classes="input-highlight-pink", | |
) | |
input_num_examples = gr.Dropdown( | |
choices=["1-2", "3-4", "5+"], | |
value="1-2", | |
label="Number of Examples/Case Studies", | |
elem_classes="input-highlight-yellow", | |
) | |
input_conclusion = gr.Dropdown( | |
choices=["Summary", "Call to Action", "Future Outlook", "Thought-provoking Question"], | |
value="Call to Action", | |
label="Conclusion Type", | |
elem_classes="input-highlight-turquoise", | |
) | |
gr.Markdown("# Search Options", elem_classes="text-center text-3xl mb-6") | |
google_default = False | |
with gr.Row(): | |
google_search_check = gr.Checkbox( | |
label="Enable Internet Search For Recent Sources", value=google_default | |
) | |
with gr.Group(visible=google_default) as search_options: | |
with gr.Row(): | |
scholar_mode_check = gr.Checkbox(label="Enable Scholar Mode", value=False) | |
with gr.Group(visible=True) as site_options: | |
with gr.Row(): | |
include_sites = gr.Textbox( | |
label="Include Specific Websites", | |
placeholder="Enter comma-separated keywords", | |
elem_classes="input-highlight-yellow", | |
) | |
with gr.Row(): | |
exclude_sites = gr.Textbox( | |
label="Exclude Specific Websites", | |
placeholder="Enter comma-separated keywords", | |
elem_classes="input-highlight-yellow", | |
) | |
with gr.Row(): | |
domains_to_include = gr.Dropdown( | |
domain_list, | |
value=domain_list, | |
multiselect=True, | |
label="Domains To Include", | |
) | |
with gr.Row(): | |
month_from = gr.Dropdown( | |
choices=months, | |
label="From Month", | |
value="January", | |
interactive=True, | |
) | |
day_from = gr.Textbox(label="From Day", value="01") | |
year_from = gr.Textbox(label="From Year", value="2000") | |
with gr.Row(): | |
month_to = gr.Dropdown( | |
choices=months, | |
label="To Month", | |
value=d1[1], | |
interactive=True, | |
) | |
day_to = gr.Textbox(label="To Day", value=d1[0]) | |
year_to = gr.Textbox(label="To Year", value=d1[2]) | |
gr.Markdown("# Add Optional PDF Files with Information", elem_classes="text-center text-3xl mb-6") | |
pdf_file_input = gr.File(label="Upload PDF(s)", file_count="multiple", file_types=[".pdf"]) | |
gr.Markdown("# Add Youtube Video Link", elem_classes="text-center text-3xl mb-6") | |
yt_url = gr.Textbox( | |
label="Youtube Video Link", | |
placeholder="Enter the link of the video", | |
elem_classes="input-highlight-pink", | |
) | |
""" | |
# NOTE: HIDE AI MODEL SELECTION | |
with gr.Group(): | |
gr.Markdown("## AI Model Configuration", elem_classes="text-xl mb-4") | |
ai_generator = gr.Dropdown( | |
choices=[ | |
"OpenAI GPT 4", | |
"OpenAI GPT 4o", | |
"OpenAI GPT 4o Mini", | |
"Claude Sonnet 3.5", | |
"Gemini 1.5 Pro", | |
"LLaMA 3", | |
], | |
value="OpenAI GPT 4o Mini", | |
label="AI Model", | |
elem_classes="input-highlight-pink", | |
) | |
input_api = gr.Textbox(label="API Key", visible=False) | |
ai_generator.change(update_visibility_api, ai_generator, input_api) | |
""" | |
generate_btn = gr.Button("Generate Article", variant="primary") | |
with gr.Column(scale=2): | |
with gr.Tab("Text Generator"): | |
output_article = gr.HTML( | |
value="""<div style="height: 600px;"></div>""", | |
label="Generated Article", | |
) | |
with gr.Accordion("Regenerate Article", open=False): | |
ai_comments = gr.Textbox( | |
label="Add comments to help edit generated text", interactive=True, visible=True | |
) | |
regenerate_btn = gr.Button("Regenerate Article", variant="primary", visible=True) | |
ai_detector_dropdown = gr.Dropdown( | |
choices=ai_check_options, label="Select AI Detector", value="Polygraf AI (Base Model)" | |
) | |
ai_check_btn = gr.Button("AI Check") | |
with gr.Accordion("AI Detection Results", open=True): | |
ai_check_result = gr.Label(label="AI Check Result") | |
mc_check_result = gr.Label(label="Creator Check Result") | |
highlighted_text = gr.HTML(label="Sentence Breakdown", visible=False) | |
with gr.Accordion("Advanced Humanizer Settings", open=False): | |
with gr.Row(): | |
model_dropdown = gr.Radio( | |
choices=["Advanced Model (Beta)"], | |
value="Advanced Model (Beta)", | |
label="Humanizer Model Version", | |
) | |
with gr.Row(): | |
temperature_slider = gr.Slider( | |
minimum=0.1, maximum=2.0, step=0.1, value=1.0, label="Temperature" | |
) | |
top_k_slider = gr.Slider(minimum=0, maximum=300, step=25, value=40, label="Top k") | |
with gr.Row(): | |
repetition_penalty_slider = gr.Slider( | |
minimum=1.0, maximum=2.0, step=0.1, value=1, label="Repetition Penalty" | |
) | |
length_penalty_slider = gr.Slider( | |
minimum=0.0, maximum=2.0, step=0.1, value=1.0, label="Length Penalty" | |
) | |
humanize_btn = gr.Button("Humanize") | |
with gr.Row(equal_height=False): | |
with gr.Column(): | |
humanizer_feedback = gr.Textbox(label="Add optional feedback on humanizer") | |
with gr.Column(): | |
report_humanized_btn = gr.Button("Report Humanized Text", variant="primary", visible=True) | |
# humanized_output = gr.Markdown(label="Humanized Article", value="\n\n\n\n", render=True) | |
# copy_to_input_btn = gr.Button("Copy to Input for AI Check") | |
with gr.Tab("History"): | |
history_chat = gr.Chatbot(label="Generation History", height=1000) | |
clear_history_btn = gr.Button("Clear History") | |
clear_history_btn.click(clear_history, outputs=[history, history_chat]) | |
""" | |
# NOTE: REMOVED REFRESH BUTTON | |
refresh_button = gr.Button("Refresh History") | |
refresh_button.click(get_history, outputs=history_chat) | |
""" | |
def regenerate_visible(text): | |
if text: | |
return gr.update(visible=True) | |
else: | |
return gr.update(visible=False) | |
def highlight_visible(text): | |
if text.startswith("Polygraf"): | |
return gr.update(visible=True) | |
else: | |
return gr.update(visible=False) | |
def search_visible(toggle): | |
if toggle: | |
return gr.update(visible=True) | |
else: | |
return gr.update(visible=False) | |
google_search_check.change( | |
lambda toggle: gr.update(visible=toggle), inputs=google_search_check, outputs=search_options | |
) | |
# ai_detector_dropdown.change(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text) | |
# output_article.change(regenerate_visible, inputs=output_article, outputs=ai_comments) | |
# ai_comments.change(regenerate_visible, inputs=output_article, outputs=regenerate_btn) | |
ai_check_btn.click(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text) | |
# Update the default structure based on the selected format | |
# e.g. "Plain Text" for certain formats | |
input_format.change(fn=update_structure, inputs=input_format, outputs=input_structure) | |
report_humanized_btn.click( | |
save_humanizer_feedback_to_cloud_storage, inputs=[latest_humanizer_data, humanizer_feedback] | |
) | |
generate_btn.click( | |
fn=generate_and_format, | |
inputs=[ | |
input_role, | |
input_topic, | |
input_context, | |
input_keywords, | |
input_length, | |
input_format, | |
input_writing_style, | |
input_tone, | |
input_user_category, | |
input_depth, | |
input_structure, | |
input_references, | |
input_num_examples, | |
input_conclusion, | |
# ai_generator, | |
# input_api, | |
google_search_check, | |
scholar_mode_check, | |
year_from, | |
month_from, | |
day_from, | |
year_to, | |
month_to, | |
day_to, | |
domains_to_include, | |
include_sites, | |
exclude_sites, | |
pdf_file_input, | |
history, | |
yt_url, | |
], | |
outputs=[output_article, history], | |
) | |
regenerate_btn.click( | |
fn=generate_and_format, | |
inputs=[ | |
input_role, | |
input_topic, | |
input_context, | |
input_keywords, | |
input_length, | |
input_format, | |
input_writing_style, | |
input_tone, | |
input_user_category, | |
input_depth, | |
input_structure, | |
input_references, | |
input_num_examples, | |
input_conclusion, | |
# ai_generator, | |
# input_api, | |
google_search_check, | |
scholar_mode_check, | |
year_from, | |
month_from, | |
day_from, | |
year_to, | |
month_to, | |
day_to, | |
domains_to_include, | |
pdf_file_input, | |
history, | |
output_article, | |
include_sites, | |
exclude_sites, | |
ai_comments, | |
], | |
outputs=[output_article, history], | |
) | |
ai_check_btn.click( | |
fn=ai_check, | |
inputs=[history, ai_detector_dropdown], | |
outputs=[ai_check_result, highlighted_text, mc_check_result], | |
) | |
humanize_btn.click( | |
fn=humanize, | |
inputs=[ | |
model_dropdown, | |
output_article, | |
temperature_slider, | |
repetition_penalty_slider, | |
top_k_slider, | |
length_penalty_slider, | |
history, | |
], | |
outputs=[output_article, history, latest_humanizer_data], | |
) | |
generate_btn.click(get_history, inputs=[history], outputs=[history_chat]) | |
regenerate_btn.click(get_history, inputs=[history], outputs=[history_chat]) | |
humanize_btn.click(get_history, inputs=[history], outputs=[history_chat]) | |
# return demo | |
if __name__ == "__main__": | |
# demo = create_interface() | |
demo.queue( | |
max_size=2, | |
default_concurrency_limit=2, | |
).launch(server_name="0.0.0.0", share=True, server_port=7890) | |
# demo.launch(server_name="0.0.0.0") | |