pronunciation-scoring / download_models.py
Kartikeyssj2's picture
Update download_models.py
5d9ed6e verified
raw
history blame
3.2 kB
import os
from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
import nltk
import os
# Define the directory to save the data
data_dir = 'nltk_data'
# Create the directory if it does not exist
if not os.path.exists(data_dir):
os.makedirs(data_dir)
# Set the NLTK data path to the local directory
nltk.data.path.append(data_dir)
# Download the required NLTK data
nltk.download('punkt', download_dir=data_dir)
nltk.download('words', download_dir=data_dir)
from transformers import Wav2Vec2Tokenizer, Wav2Vec2ForCTC, DistilBertTokenizer, DistilBertForSequenceClassification
import os
# Define directories to save the models and tokenizers
pronunciation_model_dir = 'pronunciation_model'
fluency_model_dir = 'fluency_model'
# Create the directories if they don't exist
os.makedirs(pronunciation_model_dir, exist_ok=True)
os.makedirs(fluency_model_dir, exist_ok=True)
# Download and save the Pronunciation model and tokenizer
print("Downloading pronunciation tokenizer...")
pronunciation_tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h")
pronunciation_tokenizer.save_pretrained(pronunciation_model_dir)
print("Downloading pronunciation model...")
pronunciation_model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
pronunciation_model.save_pretrained(pronunciation_model_dir)
# Download and save the Fluency model and tokenizer
print("Downloading fluency tokenizer...")
fluency_tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
fluency_tokenizer.save_pretrained(fluency_model_dir)
print("Downloading fluency model...")
fluency_model = DistilBertForSequenceClassification.from_pretrained("Kartikeyssj2/Fluency_Scoring_V2")
fluency_model.save_pretrained(fluency_model_dir)
print("Download and save completed.")
from sentence_transformers import SentenceTransformer
import os
# Define the directory to save the model
model_dir = 'content_relevance_model'
# Create the directory if it does not exist
os.makedirs(model_dir, exist_ok=True)
# Download and save the SentenceTransformer model
print("Downloading SentenceTransformer model...")
model = SentenceTransformer('sentence-transformers/msmarco-distilbert-cos-v5')
model.save(model_dir)
print("Model downloaded and saved successfully.")
from transformers import BlipProcessor, BlipForConditionalGeneration
import os
# Define directories to save the models and processors
processor_dir = 'blip_processor'
model_dir = 'blip_model'
# Create the directories if they don't exist
os.makedirs(processor_dir, exist_ok=True)
os.makedirs(model_dir, exist_ok=True)
# Download and save the BlipProcessor
print("Downloading BlipProcessor...")
image_captioning_processor = BlipProcessor.from_pretrained("noamrot/FuseCap")
image_captioning_processor.save_pretrained(processor_dir)
print("BlipProcessor downloaded and saved.")
# Download and save the BlipForConditionalGeneration model
print("Downloading BlipForConditionalGeneration model...")
image_captioning_model = BlipForConditionalGeneration.from_pretrained("noamrot/FuseCap")
image_captioning_model.save_pretrained(model_dir)
print("BlipForConditionalGeneration model downloaded and saved.")