article_writer / app.py
minko186's picture
add semantic scholar
b51be98
"""
nohup python3 app.py &
export GOOGLE_APPLICATION_CREDENTIALS="gcp_creds.json"
"""
import gc
import re
import uuid
import json
from typing import Dict
from collections import defaultdict
from datetime import date, datetime
import nltk
import torch
import numpy as np
import gradio as gr
import language_tool_python
from scipy.special import softmax
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from google.cloud import storage
if gr.NO_RELOAD:
from humanize import humanize_text, device
# humanize_text = None
# device = None
from utils import remove_special_characters, split_text_allow_complete_sentences_nltk
from google_search import google_search, months, domain_list, build_date
from ai_generate import generate, citations_to_html, remove_citations, display_cited_text, llm_wrapper
from youtube import transcribe
# nltk.download("punkt_tab")
print(f"Using device: {device}")
print("Loading AI detection models...")
models = {
"Polygraf AI (Base Model)": AutoModelForSequenceClassification.from_pretrained(
"polygraf-ai/bc-roberta-openai-2sent"
).to(device),
"Polygraf AI (Advanced Model)": AutoModelForSequenceClassification.from_pretrained(
"polygraf-ai/bc_combined_3sent"
).to(device),
}
tokenizers = {
"Polygraf AI (Base Model)": AutoTokenizer.from_pretrained("polygraf-ai/bc-roberta-openai-2sent"),
"Polygraf AI (Advanced Model)": AutoTokenizer.from_pretrained("polygraf-ai/bc_combined_3sent"),
}
# grammar correction tool
tool = language_tool_python.LanguageTool("en-US")
# source detection model
MC_TOKEN_SIZE = 256
TEXT_MC_MODEL_PATH = "polygraf-ai/mc-model"
MC_LABEL_MAP = ["OpenAI GPT", "Mistral", "CLAUDE", "Gemini", "Grammar Enhancer"]
text_mc_tokenizer = AutoTokenizer.from_pretrained(TEXT_MC_MODEL_PATH)
print("Loading Source detection model...")
text_mc_model = AutoModelForSequenceClassification.from_pretrained(TEXT_MC_MODEL_PATH).to(device)
def generate_cited_html(cited_text, citations: dict):
cited_text = cited_text.replace("\n", "<br>")
html_code = """
<style>
.reference-container {
position: relative;
display: inline-block;
}
.reference-btn {
display: inline-block;
width: 20px; /* Reduced width */
height: 20px; /* Reduced height */
border-radius: 50%;
background-color: #e33a89; /* Pink color for the button */
color: white;
text-align: center;
line-height: 20px; /* Adjusted line-height */
cursor: pointer;
font-weight: bold;
margin-right: 5px;
transition: background-color 0.3s ease, transform 0.3s ease;
}
.reference-btn:hover {
background-color: #ff69b4; /* Lighter pink on hover */
transform: scale(1.1); /* Slightly enlarge on hover */
}
.reference-popup {
display: none;
position: absolute;
z-index: 1;
top: 100%;
background-color: #f9f9f9;
border: 1px solid #ddd;
padding: 15px;
border-radius: 4px;
box-shadow: 0 2px 5px rgba(0,0,0,0.2);
width: calc(min(90vw, 400px));
max-height: calc(min(80vh, 300px));
overflow-y: auto;
}
.reference-popup .close-btn {
float: right;
cursor: pointer;
font-weight: bold;
color: white;
font-size: 16px;
padding: 0;
width: 20px;
height: 20px;
text-align: center;
line-height: 20px;
background-color: #ff4c4c;
border-radius: 2px;
transition: transform 0.3s ease, background-color 0.3s ease;
}
.reference-popup .close-btn:hover {
transform: scale(1.2);
background-color: #ff3333;
}
input[type="radio"] {
position: absolute;
opacity: 0;
pointer-events: none;
}
input[type="radio"]:checked + .reference-popup {
display: block;
}
/* Additional styling for distinct sections */
.reference-popup strong {
font-weight: bold;
color: #333;
display: block;
margin-bottom: 5px;
}
.reference-popup p {
margin: 0 0 10px 0;
padding: 0;
}
.reference-popup .source {
margin-bottom: 10px;
font-size: 14px;
font-weight: bold;
color: #1e90ff;
}
.reference-popup .content {
margin-bottom: 10px;
font-size: 13px;
color: #555;
}
@media (prefers-color-scheme: dark) {
.reference-btn {
background-color: #1e90ff;
}
.reference-popup {
background-color: #2c2c2c;
border-color: #444;
color: #f1f1f1;
}
.reference-popup .close-btn {
background-color: #ff4c4c;
}
.reference-popup .close-btn:hover {
background-color: #ff3333;
}
.reference-popup strong {
color: #ddd;
}
.reference-popup .source {
color: #1e90ff;
}
.reference-popup .content {
color: #bbb;
}
}
</style>
<script>
document.addEventListener('click', (event) => {
const containers = document.querySelectorAll('.reference-container');
containers.forEach(container => {
const rect = container.getBoundingClientRect();
const popup = container.querySelector('.reference-popup');
// Reset alignment
popup.style.left = '';
popup.style.right = '';
const popupWidth = popup.offsetWidth;
const viewportWidth = window.innerWidth;
// If the popup would go off the right edge
if (rect.right + popupWidth > viewportWidth) {
popup.style.right = '0'; // Align popup to the right
}
// If the popup would go off the left edge
else if (rect.left - popupWidth < 0) {
popup.style.left = '0'; // Align popup to the left
}
// Otherwise center it
else {
popup.style.left = '50%';
popup.style.transform = 'translateX(-50%)'; // Center the popup
}
});
});
function closeReferencePanes() {
document.querySelectorAll('input[name="reference"]').forEach((input) => {
input.checked = false;
});
}
</script>
<div style="height: 600px; overflow-y: auto; overflow-x: auto;">
"""
# Function to replace each citation with a reference button
citation_numbers = {}
next_number = 1
citation_count = 0 # To track unique instances of each citation
references = "<b>References:</b><br><br>"
def replace_citations(match):
nonlocal citation_count, next_number, references
citation_id = match.group(1) # Extract citation number from the match
ref_data = citations.get(int(citation_id))
# If reference data is not found, return the original text
if not ref_data:
return match.group(0)
# Getting PDF file from gradio path
if "/var/tmp/gradio/" in ref_data["source"]:
ref_data["source"] = ref_data["source"].split("/")[-1]
# remove new line artifacts from scraping / parsing
ref_data["content"] = ref_data["content"].replace("\n", " ")
# Check if source is a URL, make it clickable if so
if ref_data["source"].startswith("http"):
source_html = f'<a href="{ref_data["source"]}" target="_blank" class="source">{ref_data["source"]}</a>'
else:
source_html = f'<span class="source">{ref_data["source"]}</span>'
if citation_id not in citation_numbers:
citation_numbers[citation_id] = next_number
source = ref_data["source"]
content = ref_data["content"]
references += f"[{next_number}] {source}<br>- {content}<br><br>"
next_number += 1
citation_number = citation_numbers[citation_id]
# Unique id for each reference button and popup
unique_id = f"{citation_id}-{citation_count}"
citation_count += 1
# HTML code for the reference button and popup with formatted content
button_html = f"""
<span class="reference-container">
<label for="ref-toggle-{unique_id}" class="reference-btn" onclick="closeReferencePanes(); document.getElementById('ref-toggle-{unique_id}').checked = true;">{citation_number}</label>
<input type="radio" id="ref-toggle-{unique_id}" name="reference" />
<span class="reference-popup">
<span class="close-btn" onclick="document.getElementById('ref-toggle-{unique_id}').checked = false;">&times;</span>
<strong>Source:</strong> {source_html}
<strong>Content:</strong> <p class="content">{ref_data["content"]}</p>
</span>
</span>
"""
return button_html
# Replace inline citations in the text with the generated HTML
html_code += re.sub(r"<(\d+)>", replace_citations, cited_text)
html_code += "<br><br>" + references
html_code += "</div>"
return html_code
# Function to move model to the appropriate device
def to_device(model):
return model.to(device)
def copy_to_input(text):
return text
def remove_bracketed_numbers(text):
pattern = r"^\[\d+\]"
cleaned_text = re.sub(pattern, "", text)
return cleaned_text
def clean_text(text: str) -> str:
paragraphs = text.split("\n\n")
cleaned_paragraphs = []
for paragraph in paragraphs:
cleaned = re.sub(r"\s+", " ", paragraph).strip()
cleaned = re.sub(r"(?<=\.) ([a-z])", lambda x: x.group(1).upper(), cleaned)
cleaned_paragraphs.append(cleaned)
cleaned_paragraphs = [item for item in cleaned_paragraphs if item.strip()]
return "\n\n".join(cleaned_paragraphs)
def format_references(text: str) -> str:
body, references = split_text_from_refs(text)
return body + references
def split_text_from_refs(text: str, sep="\n"):
lines = text.split("\n")
references = []
article_text = []
index_pattern = re.compile(r"\[(\d+)\]")
in_references = False
for line in lines:
if line == "":
continue
match = re.search(r"[Rr]eferences:", line, re.DOTALL)
if line.strip().lower() == "references" or line.strip().lower() == "references:":
in_references = True
continue
if line.strip().lower().startswith("references:"):
in_references = True
if match:
in_references = True
line = line[match.end() :]
if in_references:
matches = index_pattern.split(line)
for match in matches:
if match.strip() and not match.isdigit() and not match.strip().lower().startswith("references:"):
references.append(match.strip())
else:
article_text.append(line.strip())
if len(references) > 0:
formatted_refs = []
for i, ref in enumerate(references, 1):
ref = remove_bracketed_numbers(ref)
formatted_refs.append(f"[{i}] {ref}{sep}")
formatted_refs = f"{sep}{sep}References:{sep}{sep}" + f"{sep}".join(formatted_refs)
else:
formatted_refs = ""
body = f"{sep}{sep}".join(article_text)
return body, formatted_refs
def ends_with_references(text):
# Define a regular expression pattern for variations of "References:"
pattern = re.compile(r"\b[Rr]eferences:\s*$", re.IGNORECASE | re.MULTILINE)
# Check if the text ends with any form of "References:"
return bool(pattern.search(text.strip()))
def format_and_correct_language_check(text: str) -> str:
return tool.correct(text)
def predict(model, tokenizer, text):
text = remove_special_characters(text)
bc_token_size = 256
with torch.no_grad():
model.eval()
tokens = tokenizer(
text,
padding="max_length",
truncation=True,
max_length=bc_token_size,
return_tensors="pt",
).to(device)
output = model(**tokens)
output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]
output_norm = {"HUMAN": output_norm[0], "AI": output_norm[1]}
torch.cuda.empty_cache()
gc.collect()
return output_norm
def ai_generated_test(text, model="BC Original"):
return predict(models[model], tokenizers[model], text)
def detection_polygraf(text, model="BC Original"):
# sentences = split_into_sentences(text)
sentences = nltk.sent_tokenize(text)
num_sentences = len(sentences)
scores = defaultdict(list)
overall_scores = []
# Process each chunk of 3 sentences and store the score for each sentence in the chunk
for i in range(num_sentences):
chunk = " ".join(sentences[i : i + 3])
if chunk:
# result = classifier(chunk)
result = ai_generated_test(chunk, model)
score = result["AI"]
for j in range(i, min(i + 3, num_sentences)):
scores[j].append(score)
# Calculate the average score for each sentence and apply color coding
paragraphs = text.split("\n")
paragraphs = [s for s in paragraphs if s.strip()]
colored_paragraphs = []
i = 0
for paragraph in paragraphs:
temp_sentences = nltk.sent_tokenize(paragraph)
colored_sentences = []
for sentence in temp_sentences:
if scores[i]:
avg_score = sum(scores[i]) / len(scores[i])
if avg_score >= 0.70:
colored_sentence = f"<span style='background-color:red;'>{sentence}</span>"
elif avg_score >= 0.55:
colored_sentence = f"<span style='background-color:GoldenRod;'>{sentence}</span>"
else:
colored_sentence = sentence
colored_sentences.append(colored_sentence)
overall_scores.append(avg_score)
i = i + 1
combined_sentences = " ".join(colored_sentences)
colored_paragraphs.append(combined_sentences)
overall_score = sum(overall_scores) / len(overall_scores)
overall_score = {"HUMAN": 1 - overall_score, "AI": overall_score}
return overall_score, "<br><br>".join(colored_paragraphs)
ai_check_options = [
"Polygraf AI (Base Model)",
"Polygraf AI (Advanced Model)",
]
def predict_mc(text):
with torch.no_grad():
text_mc_model.eval()
tokens = text_mc_tokenizer(
text,
padding="max_length",
truncation=True,
return_tensors="pt",
max_length=MC_TOKEN_SIZE,
).to(device)
output = text_mc_model(**tokens)
output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]
torch.cuda.empty_cache()
gc.collect()
return output_norm
def predict_mc_scores(input, bc_score):
mc_scores = []
segments_mc = split_text_allow_complete_sentences_nltk(input, type_det="mc", tokenizer=text_mc_tokenizer)
samples_len_mc = len(split_text_allow_complete_sentences_nltk(input, type_det="mc", tokenizer=text_mc_tokenizer))
for i in range(samples_len_mc):
cleaned_text_mc = remove_special_characters(segments_mc[i])
mc_score = predict_mc(cleaned_text_mc)
mc_scores.append(mc_score)
mc_scores_array = np.array(mc_scores)
average_mc_scores = np.mean(mc_scores_array, axis=0)
mc_score_list = average_mc_scores.tolist()
mc_score = {}
for score, label in zip(mc_score_list, MC_LABEL_MAP):
mc_score[label.upper()] = score
sum_prob = 1 - bc_score["HUMAN"]
for key, value in mc_score.items():
mc_score[key] = value * sum_prob
print("MC Score:", mc_score)
if sum_prob < 0.01:
mc_score = {}
return mc_score
def highlighter_polygraf(text, model="Polygraf AI (Base Model)"):
text = remove_citations(text)
body, references = split_text_from_refs(text)
score, text = detection_polygraf(text=body, model=model)
mc_score = predict_mc_scores(body, score) # mc score
text = text + references.replace("\n", "<br>")
return score, text, mc_score
def ai_check(history: list, option: str):
text = history[-1][1]
if option.startswith("Polygraf AI"):
return highlighter_polygraf(text, option)
else:
return highlighter_polygraf(text, option)
def generate_prompt(settings: Dict[str, str]) -> str:
settings["keywords"] = [item for item in settings["keywords"] if item.strip()]
# - Add a "References" section in the format "References:" on a new line after the requested text, formatted as [1], [2], etc. with each source on their own line
prompt = f"""
Write a {settings['article_length']} words (around) {settings['format']} on {settings['topic']}.\n
"""
if settings["context"]:
prompt += f"""
Context:
- {settings['context']}
"""
prompt += f"""
Style and Tone:
- Writing style: {settings['writing_style']}
- Tone: {settings['tone']}
- Target audience: {settings['user_category']}
Content:
- Depth: {settings['depth_of_content']}
- Structure: {', '.join(settings['structure'])}
"""
if len(settings["keywords"]) > 0:
prompt += f"""
Keywords to incorporate:
{', '.join(settings['keywords'])}
"""
prompt += f"""
Additional requirements:
- Don't start with "Here is a...", start with the requested text directly
- End with a {settings['conclusion_type']} conclusion
- Do not make any headline, title bold.
- Ensure proper paragraph breaks for better readability.
- Avoid any references to artificial intelligence, language models, or the fact that this is generated by an AI, and do not mention something like here is the article etc.
- Adhere to any format structure provided to the system if any.
"""
return prompt
def regenerate_prompt(settings: Dict[str, str]) -> str:
prompt = f"""
I am a {settings['role']}
"{settings['generated_article']}"
Edit the given text based on user comments.
User Comments:
- {settings['user_comments']}
Requirements:
- Don't start with "Here is a...", start with the requested text directly
- The original content should not be changed. Make minor modifications based on user comments above.
- Keep the references the same as the given text in the same format.
- Do not make any headline, title bold.
Context:
- {settings['context']}
Ensure proper paragraph breaks for better readability.
Avoid any references to artificial intelligence, language models, or the fact that this is generated by an AI, and do not mention something like here is the article etc.
"""
return prompt
def generate_article(
input_role: str,
topic: str,
context: str,
keywords: str,
article_length: str,
format: str,
writing_style: str,
tone: str,
user_category: str,
depth_of_content: str,
structure: str,
references: str,
num_examples: str,
conclusion_type: str,
ai_model: str,
url_content: str = None,
api_key: str = None,
pdf_file_input: list[str] = None,
generated_article: str = None,
user_comments: str = None,
yt_content: str = None,
) -> str:
settings = {
"role": input_role,
"topic": topic,
"context": context,
"keywords": [k.strip() for k in keywords.split(",")],
"article_length": article_length,
"format": format,
"writing_style": writing_style,
"tone": tone,
"user_category": user_category,
"depth_of_content": depth_of_content,
"structure": [s.strip() for s in structure.split(",")],
"references": [r.strip() for r in references.split(",")],
"num_examples": num_examples,
"conclusion_type": conclusion_type,
"generated_article": generated_article,
"user_comments": user_comments,
}
if generated_article:
prompt = regenerate_prompt(settings)
else:
prompt = generate_prompt(settings)
print("Generated Prompt...\n", prompt)
article, citations = generate(
prompt=prompt,
input_role=input_role,
topic=topic,
context=context,
model=ai_model,
url_content=url_content,
path=pdf_file_input,
# path=["./final_report.pdf"], # TODO: reset
temperature=1,
max_length=2048,
api_key=api_key,
sys_message="",
yt_content=yt_content,
)
return article, citations
def get_history(history):
# return history
history_formatted = []
for entry in history:
history_formatted.append((entry[0], entry[1]))
return history_formatted
def clear_history():
# Return empty list for history state and display
return [], []
def humanize(
model: str,
cited_text: str,
temperature: float = 1.2,
repetition_penalty: float = 1,
top_k: int = 50,
length_penalty: float = 1,
history=None,
) -> str:
print("Humanizing text...")
# body, references = split_text_from_refs(text)
cited_text = history[-1][1]
citations = history[-1][2]
article = humanize_text(
text=cited_text,
model_name=model,
temperature=temperature,
repetition_penalty=repetition_penalty,
top_k=top_k,
length_penalty=length_penalty,
)
# result = result + references
# corrected_text = format_and_correct_language_check(result)
article = clean_text(article)
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
history.append((f"Humanized Text | {timestamp}\nInput: {model}", article, citations))
latest_humanizer_data = {
"original text": cited_text,
"humanized text": article,
"citations": citations, # can remove saving citations
"metadata": {
"temperature": temperature,
"repetition_penalty": repetition_penalty,
"top_k": top_k,
"length_penalty": length_penalty,
},
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
}
return generate_cited_html(article, citations), history, latest_humanizer_data
def update_visibility_api(model: str):
if model in ["OpenAI GPT 3.5", "OpenAI GPT 4"]:
return gr.update(visible=True)
else:
return gr.update(visible=False)
# Function to update the default selected structure based on the selected format
def update_structure(format_choice):
# List of formats that should use "Plain Text"
plain_text_formats = [
"TikTok Video Content",
"Instagram Video Content",
"LinkedIn post",
"X (Twitter) post",
"Facebook post",
"Email",
]
# Set the appropriate default structure based on the selected format
if format_choice in plain_text_formats:
return gr.update(value="Plain Text", interactive=True)
else:
return gr.update(value="Introduction, Body, Conclusion", interactive=True)
# Initialize Google Cloud Storage client
client = storage.Client()
bucket_name = "ai-source-detection"
bucket = client.bucket(bucket_name)
def save_to_cloud_storage(
article,
topic,
input_role,
context,
keywords,
article_length,
format,
writing_style,
tone,
user_category,
depth_of_content,
structure,
references,
num_examples,
conclusion_type,
ai_model,
url_content,
generated_article,
user_comments,
timestamp,
):
"""Save generated article and metadata to Google Cloud Storage within a specific folder."""
# Create a unique filename
file_id = str(uuid.uuid4())
# Define the file path and name in the bucket
folder_path = "ai-writer/"
file_name = f"{folder_path}{timestamp.replace(' ', '_').replace(':', '-')}_{file_id}.json"
# Create a dictionary with the article and all relevant metadata
data = {
"article": article,
"metadata": {
"topic": topic,
"input_role": input_role,
"context": context,
"keywords": keywords,
"article_length": article_length,
"format": format,
"writing_style": writing_style,
"tone": tone,
"user_category": user_category,
"depth_of_content": depth_of_content,
"structure": structure,
"references": references,
"num_examples": num_examples,
"conclusion_type": conclusion_type,
"ai_model": ai_model,
"url_content": url_content,
"generated_article": generated_article,
"user_comments": user_comments,
"timestamp": timestamp,
},
}
# Convert data to JSON string
json_data = json.dumps(data)
# Create a blob and upload to GCS
blob = bucket.blob(file_name)
blob.upload_from_string(json_data, content_type="application/json")
return f"Data saved as {file_name} in GCS."
def save_humanizer_feedback_to_cloud_storage(data, humanizer_feedback):
"""Save generated article and metadata to Google Cloud Storage within a specific folder."""
if data:
try:
data["user_feedback"] = humanizer_feedback
# Create a unique filename
file_id = str(uuid.uuid4())
# Define the file path and name in the bucket
folder_path = "ai-writer/humanizer-feedback/"
file_name = f"{folder_path}{data['timestamp'].replace(' ', '_').replace(':', '-')}_{file_id}.json"
# Convert data to JSON string
json_data = json.dumps(data)
# Create a blob and upload to GCS
blob = bucket.blob(file_name)
blob.upload_from_string(json_data, content_type="application/json")
gr.Info("Successfully reported. Thank you for the feedback!")
except Exception:
gr.Warning("Report not saved.")
else:
gr.Warning("Nothing humanized to save yet!")
scholar_urls = [
"arxiv.org",
"aclanthology.org",
"ieeexplore.ieee.org",
"researchgate.net",
# "scholar.google.com",
"springer.com",
# "sciencedirect.com", # 400
# "onlinelibrary.wiley.com", # 400
"jstor.org", # 400
"semanticscholar.org",
"biorxiv.org",
"medrxiv.org",
"ssrn.com",
"pubmed.ncbi.nlm.nih.gov",
"cochranelibrary.com",
]
def generate_and_format(
input_role,
topic,
context,
keywords,
article_length,
format,
writing_style,
tone,
user_category,
depth_of_content,
structure,
references,
num_examples,
conclusion_type,
google_search_check,
scholar_mode_check,
year_from,
month_from,
day_from,
year_to,
month_to,
day_to,
domains_to_include,
include_sites,
exclude_sites,
pdf_file_input,
history=None,
yt_url: str = None,
ai_model="OpenAI GPT 4o",
api_key=None,
generated_article: str = None,
user_comments: str = None,
):
url_content = None
if google_search_check:
gr.Info("Searching internet for relevant content...")
date_from = build_date(year_from, month_from, day_from)
date_to = build_date(year_to, month_to, day_to)
sorted_date = f"date:r:{date_from}:{date_to}"
final_query = llm_wrapper(
input_role, topic, context, model="OpenAI GPT 4o", task_type="internet", temperature=0.7
)
if scholar_mode_check:
# scholar_site_queries = [f"site:{site.strip()}" for site in scholar_urls]
# final_query += " " + " OR ".join(scholar_site_queries)
pass
else:
if include_sites:
site_queries = [f"site:{site.strip()}" for site in include_sites.split(",")]
final_query += " " + " OR ".join(site_queries)
if exclude_sites:
exclude_queries = [f"-site:{site.strip()}" for site in exclude_sites.split(",")]
final_query += " " + " ".join(exclude_queries)
print(f"Google Search Query: {final_query}")
url_content = google_search(final_query, sorted_date, domains_to_include, scholar_mode_check)
yt_content = {}
if yt_url:
gr.Info("Transcribing YouTube video...")
transcribed_text = transcribe(yt_url)
gr.Info("Transcription completed. Generating article...")
yt_content[yt_url] = transcribed_text
# topic_context = topic + ", " + context
article, citations = generate_article(
input_role,
topic,
context,
keywords,
article_length,
format,
writing_style,
tone,
user_category,
depth_of_content,
structure,
references,
num_examples,
conclusion_type,
ai_model,
url_content,
api_key,
pdf_file_input,
generated_article,
user_comments,
yt_content,
)
# if ends_with_references(article) and url_content is not None:
# for url in url_content.keys():
# article += f"\n{url}"
article = clean_text(display_cited_text(article))
# reference_formatted = format_references(article)
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
history.append((f"Generated Text | {timestamp}\nInput: {topic}", article, citations))
# Save the article and metadata to Cloud Storage
# We dont save if there is PDF input for privacy reasons
if pdf_file_input is None:
save_message = save_to_cloud_storage(
article,
topic,
input_role,
context,
keywords,
article_length,
format,
writing_style,
tone,
user_category,
depth_of_content,
structure,
references,
num_examples,
conclusion_type,
ai_model,
url_content,
generated_article,
user_comments,
timestamp,
)
print(save_message)
return generate_cited_html(article, citations), history
# def create_interface():
with gr.Blocks(
theme=gr.themes.Default(
primary_hue=gr.themes.colors.pink, secondary_hue=gr.themes.colors.yellow, neutral_hue=gr.themes.colors.gray
),
css="""
.input-highlight-pink block_label {background-color: #008080}
""",
) as demo:
history = gr.State([])
latest_humanizer_data = gr.State()
today = date.today()
# dd/mm/YY
d1 = today.strftime("%d/%B/%Y")
d1 = d1.split("/")
gr.Markdown("# Polygraf AI Content Writer", elem_classes="text-center text-3xl mb-6")
with gr.Row():
with gr.Column(scale=1):
with gr.Group():
gr.Markdown("## Article Configuration", elem_classes="text-xl mb-4")
input_role = gr.Textbox(label="I am a", placeholder="Enter your role", value="Student")
input_topic = gr.Textbox(
label="Topic",
placeholder="Enter the main topic of your article",
elem_classes="input-highlight-pink",
)
input_context = gr.Textbox(
label="Context",
placeholder="Provide some context for your topic",
elem_classes="input-highlight-pink",
)
input_keywords = gr.Textbox(
label="Keywords",
placeholder="Enter comma-separated keywords",
elem_classes="input-highlight-yellow",
)
with gr.Row():
input_format = gr.Dropdown(
choices=[
"Article",
"Essay",
"Blog post",
"Report",
"Research paper",
"News article",
"White paper",
"Email",
"LinkedIn post",
"X (Twitter) post",
"Instagram Video Content",
"TikTok Video Content",
"Facebook post",
],
value="Article",
label="Format",
elem_classes="input-highlight-turquoise",
)
input_length = gr.Slider(
minimum=50,
maximum=5000,
step=50,
value=300,
label="Article Length",
elem_classes="input-highlight-pink",
)
with gr.Row():
input_writing_style = gr.Dropdown(
choices=[
"Formal",
"Informal",
"Technical",
"Conversational",
"Journalistic",
"Academic",
"Creative",
],
value="Formal",
label="Writing Style",
elem_classes="input-highlight-yellow",
)
input_tone = gr.Dropdown(
choices=["Friendly", "Professional", "Neutral", "Enthusiastic", "Skeptical", "Humorous"],
value="Professional",
label="Tone",
elem_classes="input-highlight-turquoise",
)
input_user_category = gr.Dropdown(
choices=[
"Students",
"Professionals",
"Researchers",
"General Public",
"Policymakers",
"Entrepreneurs",
],
value="General Public",
label="Target Audience",
elem_classes="input-highlight-pink",
)
input_depth = gr.Dropdown(
choices=[
"Surface-level overview",
"Moderate analysis",
"In-depth research",
"Comprehensive study",
],
value="Moderate analysis",
label="Depth of Content",
elem_classes="input-highlight-yellow",
)
input_structure = gr.Dropdown(
choices=[
"Introduction, Body, Conclusion",
"Abstract, Introduction, Methods, Results, Discussion, Conclusion",
"Executive Summary, Problem Statement, Analysis, Recommendations, Conclusion",
"Introduction, Literature Review, Methodology, Findings, Analysis, Conclusion",
"Plain Text",
],
value="Introduction, Body, Conclusion",
label="Structure",
elem_classes="input-highlight-turquoise",
interactive=True,
)
input_references = gr.Dropdown(
choices=[
"Academic journals",
"Industry reports",
"Government publications",
"News outlets",
"Expert interviews",
"Case studies",
],
value="News outlets",
label="References",
elem_classes="input-highlight-pink",
)
input_num_examples = gr.Dropdown(
choices=["1-2", "3-4", "5+"],
value="1-2",
label="Number of Examples/Case Studies",
elem_classes="input-highlight-yellow",
)
input_conclusion = gr.Dropdown(
choices=["Summary", "Call to Action", "Future Outlook", "Thought-provoking Question"],
value="Call to Action",
label="Conclusion Type",
elem_classes="input-highlight-turquoise",
)
gr.Markdown("# Search Options", elem_classes="text-center text-3xl mb-6")
google_default = False
with gr.Row():
google_search_check = gr.Checkbox(
label="Enable Internet Search For Recent Sources", value=google_default
)
with gr.Group(visible=google_default) as search_options:
with gr.Row():
scholar_mode_check = gr.Checkbox(label="Enable Scholar Mode", value=False)
with gr.Group(visible=True) as site_options:
with gr.Row():
include_sites = gr.Textbox(
label="Include Specific Websites",
placeholder="Enter comma-separated keywords",
elem_classes="input-highlight-yellow",
)
with gr.Row():
exclude_sites = gr.Textbox(
label="Exclude Specific Websites",
placeholder="Enter comma-separated keywords",
elem_classes="input-highlight-yellow",
)
with gr.Row():
domains_to_include = gr.Dropdown(
domain_list,
value=domain_list,
multiselect=True,
label="Domains To Include",
)
with gr.Row():
month_from = gr.Dropdown(
choices=months,
label="From Month",
value="January",
interactive=True,
)
day_from = gr.Textbox(label="From Day", value="01")
year_from = gr.Textbox(label="From Year", value="2000")
with gr.Row():
month_to = gr.Dropdown(
choices=months,
label="To Month",
value=d1[1],
interactive=True,
)
day_to = gr.Textbox(label="To Day", value=d1[0])
year_to = gr.Textbox(label="To Year", value=d1[2])
gr.Markdown("# Add Optional PDF Files with Information", elem_classes="text-center text-3xl mb-6")
pdf_file_input = gr.File(label="Upload PDF(s)", file_count="multiple", file_types=[".pdf"])
gr.Markdown("# Add Youtube Video Link", elem_classes="text-center text-3xl mb-6")
yt_url = gr.Textbox(
label="Youtube Video Link",
placeholder="Enter the link of the video",
elem_classes="input-highlight-pink",
)
"""
# NOTE: HIDE AI MODEL SELECTION
with gr.Group():
gr.Markdown("## AI Model Configuration", elem_classes="text-xl mb-4")
ai_generator = gr.Dropdown(
choices=[
"OpenAI GPT 4",
"OpenAI GPT 4o",
"OpenAI GPT 4o Mini",
"Claude Sonnet 3.5",
"Gemini 1.5 Pro",
"LLaMA 3",
],
value="OpenAI GPT 4o Mini",
label="AI Model",
elem_classes="input-highlight-pink",
)
input_api = gr.Textbox(label="API Key", visible=False)
ai_generator.change(update_visibility_api, ai_generator, input_api)
"""
generate_btn = gr.Button("Generate Article", variant="primary")
with gr.Column(scale=2):
with gr.Tab("Text Generator"):
output_article = gr.HTML(
value="""<div style="height: 600px;"></div>""",
label="Generated Article",
)
with gr.Accordion("Regenerate Article", open=False):
ai_comments = gr.Textbox(
label="Add comments to help edit generated text", interactive=True, visible=True
)
regenerate_btn = gr.Button("Regenerate Article", variant="primary", visible=True)
ai_detector_dropdown = gr.Dropdown(
choices=ai_check_options, label="Select AI Detector", value="Polygraf AI (Base Model)"
)
ai_check_btn = gr.Button("AI Check")
with gr.Accordion("AI Detection Results", open=True):
ai_check_result = gr.Label(label="AI Check Result")
mc_check_result = gr.Label(label="Creator Check Result")
highlighted_text = gr.HTML(label="Sentence Breakdown", visible=False)
with gr.Accordion("Advanced Humanizer Settings", open=False):
with gr.Row():
model_dropdown = gr.Radio(
choices=["Advanced Model (Beta)"],
value="Advanced Model (Beta)",
label="Humanizer Model Version",
)
with gr.Row():
temperature_slider = gr.Slider(
minimum=0.1, maximum=2.0, step=0.1, value=1.0, label="Temperature"
)
top_k_slider = gr.Slider(minimum=0, maximum=300, step=25, value=40, label="Top k")
with gr.Row():
repetition_penalty_slider = gr.Slider(
minimum=1.0, maximum=2.0, step=0.1, value=1, label="Repetition Penalty"
)
length_penalty_slider = gr.Slider(
minimum=0.0, maximum=2.0, step=0.1, value=1.0, label="Length Penalty"
)
humanize_btn = gr.Button("Humanize")
with gr.Row(equal_height=False):
with gr.Column():
humanizer_feedback = gr.Textbox(label="Add optional feedback on humanizer")
with gr.Column():
report_humanized_btn = gr.Button("Report Humanized Text", variant="primary", visible=True)
# humanized_output = gr.Markdown(label="Humanized Article", value="\n\n\n\n", render=True)
# copy_to_input_btn = gr.Button("Copy to Input for AI Check")
with gr.Tab("History"):
history_chat = gr.Chatbot(label="Generation History", height=1000)
clear_history_btn = gr.Button("Clear History")
clear_history_btn.click(clear_history, outputs=[history, history_chat])
"""
# NOTE: REMOVED REFRESH BUTTON
refresh_button = gr.Button("Refresh History")
refresh_button.click(get_history, outputs=history_chat)
"""
def regenerate_visible(text):
if text:
return gr.update(visible=True)
else:
return gr.update(visible=False)
def highlight_visible(text):
if text.startswith("Polygraf"):
return gr.update(visible=True)
else:
return gr.update(visible=False)
def search_visible(toggle):
if toggle:
return gr.update(visible=True)
else:
return gr.update(visible=False)
google_search_check.change(
lambda toggle: gr.update(visible=toggle), inputs=google_search_check, outputs=search_options
)
# ai_detector_dropdown.change(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
# output_article.change(regenerate_visible, inputs=output_article, outputs=ai_comments)
# ai_comments.change(regenerate_visible, inputs=output_article, outputs=regenerate_btn)
ai_check_btn.click(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
# Update the default structure based on the selected format
# e.g. "Plain Text" for certain formats
input_format.change(fn=update_structure, inputs=input_format, outputs=input_structure)
report_humanized_btn.click(
save_humanizer_feedback_to_cloud_storage, inputs=[latest_humanizer_data, humanizer_feedback]
)
generate_btn.click(
fn=generate_and_format,
inputs=[
input_role,
input_topic,
input_context,
input_keywords,
input_length,
input_format,
input_writing_style,
input_tone,
input_user_category,
input_depth,
input_structure,
input_references,
input_num_examples,
input_conclusion,
# ai_generator,
# input_api,
google_search_check,
scholar_mode_check,
year_from,
month_from,
day_from,
year_to,
month_to,
day_to,
domains_to_include,
include_sites,
exclude_sites,
pdf_file_input,
history,
yt_url,
],
outputs=[output_article, history],
)
regenerate_btn.click(
fn=generate_and_format,
inputs=[
input_role,
input_topic,
input_context,
input_keywords,
input_length,
input_format,
input_writing_style,
input_tone,
input_user_category,
input_depth,
input_structure,
input_references,
input_num_examples,
input_conclusion,
# ai_generator,
# input_api,
google_search_check,
scholar_mode_check,
year_from,
month_from,
day_from,
year_to,
month_to,
day_to,
domains_to_include,
pdf_file_input,
history,
output_article,
include_sites,
exclude_sites,
ai_comments,
],
outputs=[output_article, history],
)
ai_check_btn.click(
fn=ai_check,
inputs=[history, ai_detector_dropdown],
outputs=[ai_check_result, highlighted_text, mc_check_result],
)
humanize_btn.click(
fn=humanize,
inputs=[
model_dropdown,
output_article,
temperature_slider,
repetition_penalty_slider,
top_k_slider,
length_penalty_slider,
history,
],
outputs=[output_article, history, latest_humanizer_data],
)
generate_btn.click(get_history, inputs=[history], outputs=[history_chat])
regenerate_btn.click(get_history, inputs=[history], outputs=[history_chat])
humanize_btn.click(get_history, inputs=[history], outputs=[history_chat])
# return demo
if __name__ == "__main__":
# demo = create_interface()
demo.queue(
max_size=2,
default_concurrency_limit=2,
).launch(server_name="0.0.0.0", share=True, server_port=7890)
# demo.launch(server_name="0.0.0.0")