Text2Text Generation
fastText
English
File size: 2,234 Bytes
06a6a7a
 
 
 
633b8f2
06a6a7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# from docx import Document
# from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments
# import torch
# import gradio as gr

# # Load the Word document
# docx_file_path = "Our Leadership.docx"
# doc = Document(docx_file_path)

# # Extract text from the document
# text = ""
# for paragraph in doc.paragraphs:
#     text += paragraph.text + "\n"

# # Save the extracted text to a text file
# txt_file_path = "extracted_text.txt"
# with open(txt_file_path, "w", encoding="utf-8") as file:
#     file.write(text)

# # Load the pre-trained GPT-2 model and tokenizer
# tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
# model = GPT2LMHeadModel.from_pretrained("gpt2")

# # Tokenize the training data
# input_ids = tokenizer(text, return_tensors="pt", padding=True, truncation=True)["input_ids"]

# # Define the training arguments
# training_args = TrainingArguments(
#     per_device_train_batch_size=4,
#     num_train_epochs=3,
#     logging_dir='./logs',
# )

# # Define a dummy data collator (required by Trainer)
# class DummyDataCollator:
#     def __call__(self, features):
#         return features

# # Define a Trainer instance
# trainer = Trainer(
#     model=model,
#     args=training_args,
#     data_collator=DummyDataCollator(),
#     train_dataset=input_ids
# )

# # Train the model
# trainer.train()

# # Define the chatbot function
# def chatbot(input_text):
#     # Tokenize input text
#     input_ids = tokenizer.encode(input_text, return_tensors="pt")

#     # Generate response from the model
#     output_ids = model.generate(input_ids, max_length=50, pad_token_id=tokenizer.eos_token_id)
    
#     # Decode the generated response
#     response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    
#     return response

# # Create the Gradio interface
# chatbot_interface = gr.Interface(chatbot, "textbox", "textbox", title="Chatbot")

# # Launch the Gradio interface
# chatbot_interface.launch()


import os

# Get the current working directory
current_directory = os.getcwd()

# Construct the full file path
docx_file_name = "Our Leadership.docx"
full_file_path = os.path.join(current_directory, docx_file_name)

# Print the file path
print("File path:", full_file_path)