Spaces:
Build error
Build error
# -*- coding: utf-8 -*- | |
import json | |
import os | |
import re | |
import matplotlib.pyplot as plt | |
import numpy as np | |
from io import StringIO | |
from App4api.bin import constants | |
from collections import OrderedDict | |
from App4api.bin.InformationExtractor import InformationExtractor | |
from App4api.bin.ParameterExtractor import ParameterExtractor | |
from App4api.bin.TechnologyFinder import TechnologyFinder | |
class PGProcessor(object): | |
def __init__(self, patents,input_folder, file_extension): | |
self.patents = patents | |
self.input_folder = input_folder | |
self.file_extension = file_extension | |
print("Processing started") | |
def process_corpus(self): | |
count_abstract = 0 | |
count_claims = 0 | |
count_description = 0 | |
count_patent = 0 | |
total_sentences_number =0 | |
count_concepts_solupart = 0 | |
count_concepts_problem = 0 | |
patents = self.patents | |
input_folder = self.input_folder | |
file_extension = self.file_extension | |
project_folder = os.path.basename(os.path.normpath(input_folder)) | |
graph_folder = constants.GRAPH_FOLDER + project_folder+"/" | |
extracted_concepts = [] | |
output_result = [] | |
parameters_graph = [] | |
reduced_content = [] | |
patent_corpus = [] | |
source_list = [] | |
parameters_list =[] | |
technologies_graph =[] | |
for patent_file in patents: | |
read_patent = StringIO(patent_file) | |
patent = json.load(read_patent) | |
nNumber = patent['number'] | |
aAbstract = patent['abstract'] | |
cClaims = patent['claims'] | |
dDescription = patent['description'] | |
source = patent['source'] | |
if dDescription !="": | |
count_description +=1 | |
extract_concepts = InformationExtractor(dDescription,input_folder, file_extension, nNumber, source ) | |
output_json, total_sentences_number = extract_concepts.get_from_description() | |
if output_json !="": | |
extracted_concepts.append(output_json) | |
total_sentences_number += total_sentences_number | |
elif cClaims !="": | |
count_claims +=1 | |
print('Processing claims') | |
else: | |
count_abstract +=1 | |
print("processing abstract") | |
count_patent +=1 | |
#print(source) | |
source_list.append(source) | |
header = '{' | |
graph = '"problem_graph": [%s]' % ','.join(extracted_concepts) | |
footer = '}' | |
output_result.extend((header, graph, footer)) | |
output_result = "".join(output_result) | |
concepts_json = json.loads(output_result) | |
count_concepts = len(concepts_json['problem_graph']) | |
for item, value in concepts_json.items(): | |
#if cle == "type" and value =="partialSolution": | |
# print ("yes") | |
for element in value: | |
for cle, valeur in element.items(): | |
for k,v in valeur.items(): | |
if k == "type" and v =="partialSolution": | |
count_concepts_solupart += 1 | |
elif k == "type" and v =="problem": | |
count_concepts_problem += 1 | |
json_write_to_file = json.dumps(concepts_json, sort_keys=False, indent=4, separators=(',', ': ')) | |
#print(concepts_json.keys()) | |
with open(graph_folder+"graph.json", 'w') as json_graph: | |
json_graph.write(json_write_to_file) | |
print("Le corpus contenait %s brevets dont %s abstract, %s revendications et %s descriptions" % (count_patent, count_abstract, count_claims, count_description)) | |
print("%s phrases ont été analysée(s)" % (total_sentences_number)) | |
print("%s concepts ont été trouvé(s) dont %s problèmes et %s solutions partielles" % (count_concepts, count_concepts_problem, count_concepts_solupart)) | |
#Display graphics | |
first_color = (46, 204, 113) | |
second_color = (245, 176, 65) | |
#self.make_graphic([count_concepts_problem, count_concepts_solupart], "Ratio",[first_color,second_color],['Problems','Partial Solutions']) | |
return concepts_json |