YchKhan commited on
Commit
e46c1c6
0 Parent(s):

Duplicate from OrganizedProgrammers/SEPredictor

Browse files
Files changed (6) hide show
  1. .gitattributes +35 -0
  2. README.md +13 -0
  3. app.py +65 -0
  4. ebd4appdom.xlsx +3 -0
  5. requirements.txt +7 -0
  6. templates/index.html +95 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ ebd4appdom.xlsx filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: SEPredictor
3
+ emoji: ⚡
4
+ colorFrom: green
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 3.29.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: OrganizedProgrammers/SEPredictor
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, jsonify, request, render_template
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sentence_transformers import SentenceTransformer, util
5
+ import torch
6
+ import re
7
+
8
+ app = Flask(__name__)
9
+
10
+
11
+
12
+ def extract_embeddings(embeddings_str):
13
+ pattern = r'(-?\d+(?:\.\d+)?(?:[eE]-?\d+)?)'
14
+ matches = re.findall(pattern, embeddings_str)
15
+ return list(map(float, matches))
16
+
17
+ df = pd.read_excel("ebd4appdom.xlsx")
18
+ embedder = SentenceTransformer('all-MiniLM-L6-v2')
19
+ df['Embeddings'] = df['Embeddings'].apply(extract_embeddings)
20
+ descriptions_embeddings = list(df.Embeddings)
21
+ patnums = list(df["Number"])
22
+ standards = list(df["Standards"])
23
+ urls = list(df["URL"])
24
+ descriptions = list(df.Description)
25
+
26
+ def split_string(s, max_len, overlap, min_words_count=0):
27
+ words = s.split()
28
+ substrings = []
29
+ start = 0
30
+ while start + max_len < len(words):
31
+ end = start + max_len
32
+ substring = " ".join(words[start:end])
33
+ substrings.append(substring)
34
+ start = end - overlap
35
+ substrings.append(" ".join(words[start:]))
36
+ long_substrings = []
37
+ for string in substrings:
38
+ if len(string.split()) > min_words_count:
39
+ long_substrings.append(string)
40
+ return long_substrings
41
+
42
+ @app.route('/', methods=['GET', 'POST'])
43
+ def index():
44
+ if request.method == 'POST':
45
+ query = request.form['query']
46
+ user_samples = split_string(query, 80, 3)
47
+ top_k = min(5, len(descriptions))
48
+ results = []
49
+ cpt=0
50
+ for user_sample in user_samples:
51
+ sp=[[user_sample, 'sample' + str(cpt)]]
52
+ sample_embedding = embedder.encode(user_sample, convert_to_tensor=True)
53
+ cos_scores = util.cos_sim(sample_embedding, descriptions_embeddings)[0]
54
+ top_results = torch.topk(cos_scores, top_k)
55
+ for score, idx in zip(top_results[0], top_results[1]):
56
+ my_dict = dict(score= round(float(score.item()), 4), standards=standards[idx], desc=descriptions[idx], url=urls[idx])
57
+ sp.append(my_dict)
58
+ results.append(sp)
59
+ cpt += 1
60
+ return render_template('index.html', results=results)
61
+ else:
62
+ return render_template('index.html', results=None)
63
+
64
+ if __name__ == '__main__':
65
+ app.run(host="0.0.0.0", port=7860)
ebd4appdom.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c00f2818e0c2e19382d6d180b020c4e8a03b681f5b7d8afb8cf39b620b5faea6
3
+ size 332417405
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ pandas
3
+ numpy
4
+ torch>=1.6
5
+ flask
6
+ sentence-transformers
7
+ openpyxl
templates/index.html ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <title>SEPredictor</title>
6
+ <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-aFq/bzH65dt+w6FI2ooMVUpc+21e0SRygnTpmBvdBgSdnuTN7QbdgL+OapgHtvPp" crossorigin="anonymous">
7
+ <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js" integrity="sha384-qKXV1j0HvMUeCBQ+QVp7JcfGl760yU08IQ+GpUo5hlbpg51QRiuqHAJz8+BrxE/N" crossorigin="anonymous"></script>
8
+ <style>
9
+ body {
10
+ background-color: #EEEEEE;
11
+ }
12
+ form {
13
+ padding: 5em 10em;
14
+ }
15
+ .btn-primary {
16
+ font-size: 1.2em;
17
+ padding: 0em 7em;
18
+ display: block;
19
+ margin: 0 auto;
20
+ }
21
+ .similarsamples{
22
+ padding: 2em 2em;
23
+ }
24
+ .navbar {
25
+ position: fixed;
26
+ top: 0;
27
+ width: 100%;
28
+ z-index: 1;
29
+ }
30
+ </style>
31
+
32
+ </head>
33
+
34
+
35
+ <body>
36
+
37
+
38
+ <nav class="navbar bg-body-tertiary">
39
+ <div class="container-fluid">
40
+ <a class="navbar-brand" href="http://127.0.0.1:5000/">SEPredictor</a>
41
+ </div>
42
+ </nav>
43
+
44
+
45
+ <form method="POST" action="/">
46
+ <div class="mb-3">
47
+ <label for="query">Enter your patent description:</label>
48
+ <textarea type="text" id="query" name="query" class="form-control" rows="10"></textarea>
49
+ </div>
50
+ <input type="submit" value="Search similar patents" class="btn btn-primary mb-3">
51
+ </form>
52
+
53
+
54
+ <div class="similarsamples">
55
+ {% if results %}
56
+ <h2>Description Samples:</h2>
57
+ {% for result in results %}
58
+ <div class="accordion accordion-flush" id="accordionFlushExample">
59
+ <div class="accordion-item">
60
+ <h2 class="accordion-header">
61
+ <button class="accordion-button collapsed" type="button" data-bs-toggle="collapse" data-bs-target= '#{{ result[0][1] }}' aria-expanded="false" aria-controls="flush-collapseOne">
62
+ <p>{{ result[0][0] }}</p>
63
+ </button>
64
+ </h2>
65
+ <div id="{{ result[0][1] }}" class="accordion-collapse collapse" data-bs-parent="#accordionFlushExample">
66
+ <div class="accordion-body">
67
+ <table class="table table-light table-striped">
68
+ <thead>
69
+ <tr>
70
+ <th scope="col">Score</th>
71
+ <th scope="col">Sample</th>
72
+ <th scope="col">Standards</th>
73
+ <th scope="col">Document</th>
74
+ </tr>
75
+ </thead>
76
+ <tbody>
77
+ {% for i in range(1,6) %}
78
+ <tr>
79
+ <th scope="row">{{ result[i]['score'] }}</th>
80
+ <td>{{ result[i]['desc'] }}</td>
81
+ <td>{{ result[i]['standards'] }}</td>
82
+ <td><a href= {{ result[i]['url'] }}>Open</a></td>
83
+ </tr>
84
+ {% endfor %}
85
+ </tbody>
86
+ </table>
87
+ </div>
88
+ </div>
89
+ </div>
90
+ </div>
91
+ {% endfor %}
92
+ {% endif %}
93
+ </div>
94
+ </body>
95
+ </html>