VerVelVel commited on
Commit
087390d
1 Parent(s): af91aff

for logreg custom class

Browse files
models/__init__.py ADDED
File without changes
models/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (148 Bytes). View file
 
models/model1/Custom_class.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.base import BaseEstimator, TransformerMixin
5
+ from sklearn.feature_extraction.text import TfidfVectorizer
6
+ from sklearn.decomposition import TruncatedSVD
7
+ from sklearn.pipeline import Pipeline, FeatureUnion
8
+ from sklearn.linear_model import LogisticRegression
9
+ from sklearn.preprocessing import Normalizer
10
+ import joblib
11
+ import nltk
12
+ from nltk.corpus import stopwords
13
+ from pymorphy2 import MorphAnalyzer
14
+ import string
15
+
16
+ nltk.download('stopwords')
17
+ nltk.download('punkt')
18
+
19
+ class TextPreprocessor(BaseEstimator, TransformerMixin):
20
+ def __init__(self):
21
+ self.stop_words = set(stopwords.words('russian'))
22
+ self.morph = MorphAnalyzer()
23
+
24
+ def preprocess_text(self, text):
25
+ # Удаление всего, что не является буквами или знаками препинания
26
+ clean_pattern = re.compile(r'[^a-zA-Zа-яА-ЯёЁ0-9.,!?;:\s]')
27
+ text = clean_pattern.sub('', text)
28
+ url_pattern = re.compile(r'http\S+|www\S+|https\S+')
29
+ text = url_pattern.sub(r'', text)
30
+ text = text.translate(str.maketrans('', '', string.punctuation))
31
+ text = text.lower()
32
+ tokens = text.split()
33
+ lemmatized_text = ' '.join([self.morph.parse(word)[0].normal_form for word in tokens if word not in self.stop_words])
34
+ return lemmatized_text
35
+
36
+ def fit(self, X, y=None):
37
+ return self
38
+
39
+ def transform(self, X, y=None):
40
+ return X.apply(self.preprocess_text)
models/model1/__init__.py ADDED
File without changes
models/model1/__pycache__/Custom_class.cpython-310.pyc ADDED
Binary file (2.13 kB). View file
 
models/model1/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (155 Bytes). View file
 
models/model1/model_weights.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:da7fd2151d6a5446fc178462ff93ee61c24f98cb0aa41343e2e8c36802e2170b
3
- size 47712485
 
 
 
 
models/model2/__init__.py ADDED
File without changes
models/model2/model.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  class BERTClassifier(nn.Module):
2
  def __init__(self):
3
  super().__init__()
 
1
+ import torch
2
+ from torch import nn
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
+
5
+
6
  class BERTClassifier(nn.Module):
7
  def __init__(self):
8
  super().__init__()
pages/comments.py ADDED
File without changes
pages/policlinic.py CHANGED
@@ -1,9 +1,10 @@
1
  import streamlit as st
2
  import joblib
3
  import pandas as pd
 
4
 
5
  # Load the trained pipeline
6
- pipeline = joblib.load('logistic_regression_pipeline.pkl')
7
 
8
  # Streamlit application
9
  st.title('Классификация отзывов на русском языке')
@@ -13,3 +14,4 @@ input_text = st.text_area('Введите текст отзыва')
13
  if st.button('Предсказать'):
14
  prediction = pipeline.predict(pd.Series([input_text]))
15
  st.write(f'Предсказанный класс с помощью логрег: {prediction[0]}')
 
 
1
  import streamlit as st
2
  import joblib
3
  import pandas as pd
4
+ from models.model1.Custom_class import TextPreprocessor
5
 
6
  # Load the trained pipeline
7
+ pipeline = joblib.load('models/model1/logistic_regression_pipeline.pkl')
8
 
9
  # Streamlit application
10
  st.title('Классификация отзывов на русском языке')
 
14
  if st.button('Предсказать'):
15
  prediction = pipeline.predict(pd.Series([input_text]))
16
  st.write(f'Предсказанный класс с помощью логрег: {prediction[0]}')
17
+ st.write(f'1 - negative, 0 - positive')