|
|
|
|
|
|
|
|
|
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
|
import nltk
|
|
from nltk import sent_tokenize
|
|
from collections import Counter
|
|
|
|
|
|
|
|
nltk.download('punkt')
|
|
|
|
|
|
model_name = "facebook/bart-large-cnn"
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
|
|
|
|
|
summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)
|
|
|
|
|
|
|
|
def extract_events(text):
|
|
"""
|
|
Extract events from the input text.
|
|
Here, sentences are considered as events.
|
|
"""
|
|
sentences = sent_tokenize(text)
|
|
return sentences
|
|
|
|
|
|
|
|
def abstract_events(events):
|
|
"""
|
|
Generalize the extracted events using a summarization model.
|
|
Each event (sentence) is abstracted and summarized.
|
|
"""
|
|
abstracted_events = [summarizer(event, max_length=30, min_length=10, do_sample=False)[0]['summary_text'] for event
|
|
in events]
|
|
return abstracted_events
|
|
|
|
|
|
|
|
def common_events(abstracted_events):
|
|
"""
|
|
Analyze the abstracted events to find out which events are most common.
|
|
"""
|
|
event_counter = Counter(abstracted_events)
|
|
|
|
common_events = [event for event, count in event_counter.items() if count > 1]
|
|
return common_events
|
|
|
|
|
|
|
|
def generate_summary(common_events):
|
|
"""
|
|
Generate a concise summary from the most common events.
|
|
"""
|
|
combined_text = " ".join(common_events)
|
|
summary = summarizer(combined_text, max_length=100, min_length=50, do_sample=False)[0]['summary_text']
|
|
return summary
|
|
|
|
|
|
|
|
def chain_of_event_prompting(texts):
|
|
"""
|
|
Full Chain-of-Event Prompting workflow:
|
|
1. Extract events from multiple texts.
|
|
2. Generalize and abstract the events.
|
|
3. Analyze the commonality of the events.
|
|
4. Generate a summary from the common events.
|
|
"""
|
|
all_events = []
|
|
for text in texts:
|
|
events = extract_events(text)
|
|
abstracted_events = abstract_events(events)
|
|
all_events.extend(abstracted_events)
|
|
|
|
common_events_list = common_events(all_events)
|
|
summary = generate_summary(common_events_list)
|
|
|
|
return summary
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
texts = [
|
|
"The company announced a new product line which will be launched next month.",
|
|
"A new product line is being developed by the company, with a launch expected in the near future.",
|
|
"Next month, the company plans to introduce a new series of products to the market."
|
|
]
|
|
|
|
|
|
final_summary = chain_of_event_prompting(texts)
|
|
print("Final Summary:", final_summary)
|
|
|