PatentSolver

Build error

App Files Files Community

PatentSolver / App /bin /PGProcessor.py

RedBaron5

Duplicate from xin/PatentSolver

c13b805 almost 2 years ago

raw

history blame contribute delete

4.34 kB

	# -- coding: utf-8 --

	import json
	import os
	import re
	import matplotlib.pyplot as plt
	import numpy as np
	from io import StringIO
	from App4api.bin import constants
	from collections import OrderedDict
	from App4api.bin.InformationExtractor import InformationExtractor
	from App4api.bin.ParameterExtractor import ParameterExtractor
	from App4api.bin.TechnologyFinder import TechnologyFinder

	class PGProcessor(object):

	def __init__(self, patents,input_folder, file_extension):
	self.patents = patents
	self.input_folder = input_folder
	self.file_extension = file_extension
	print("Processing started")

	def process_corpus(self):

	count_abstract = 0
	count_claims = 0
	count_description = 0
	count_patent = 0
	total_sentences_number =0
	count_concepts_solupart = 0
	count_concepts_problem = 0
	patents = self.patents
	input_folder = self.input_folder
	file_extension = self.file_extension
	project_folder = os.path.basename(os.path.normpath(input_folder))
	graph_folder = constants.GRAPH_FOLDER + project_folder+"/"
	extracted_concepts = []
	output_result = []
	parameters_graph = []
	reduced_content = []
	patent_corpus = []
	source_list = []
	parameters_list =[]
	technologies_graph =[]


	for patent_file in patents:

	read_patent = StringIO(patent_file)
	patent = json.load(read_patent)
	nNumber = patent['number']
	aAbstract = patent['abstract']
	cClaims = patent['claims']
	dDescription = patent['description']
	source = patent['source']

	if dDescription !="":
	count_description +=1
	extract_concepts = InformationExtractor(dDescription,input_folder, file_extension, nNumber, source )
	output_json, total_sentences_number = extract_concepts.get_from_description()
	if output_json !="":
	extracted_concepts.append(output_json)
	total_sentences_number += total_sentences_number
	elif cClaims !="":
	count_claims +=1
	print('Processing claims')
	else:
	count_abstract +=1
	print("processing abstract")
	count_patent +=1


	#print(source)
	source_list.append(source)


	header = '{'
	graph = '"problem_graph": [%s]' % ','.join(extracted_concepts)
	footer = '}'
	output_result.extend((header, graph, footer))
	output_result = "".join(output_result)
	concepts_json = json.loads(output_result)
	count_concepts = len(concepts_json['problem_graph'])
	for item, value in concepts_json.items():
	#if cle == "type" and value =="partialSolution":
	# print ("yes")
	for element in value:
	for cle, valeur in element.items():
	for k,v in valeur.items():
	if k == "type" and v =="partialSolution":
	count_concepts_solupart += 1
	elif k == "type" and v =="problem":
	count_concepts_problem += 1
	json_write_to_file = json.dumps(concepts_json, sort_keys=False, indent=4, separators=(',', ': '))
	#print(concepts_json.keys())
	with open(graph_folder+"graph.json", 'w') as json_graph:
	json_graph.write(json_write_to_file)

	print("Le corpus contenait %s brevets dont %s abstract, %s revendications et %s descriptions" % (count_patent, count_abstract, count_claims, count_description))
	print("%s phrases ont été analysée(s)" % (total_sentences_number))
	print("%s concepts ont été trouvé(s) dont %s problèmes et %s solutions partielles" % (count_concepts, count_concepts_problem, count_concepts_solupart))

	#Display graphics
	first_color = (46, 204, 113)
	second_color = (245, 176, 65)
	#self.make_graphic([count_concepts_problem, count_concepts_solupart], "Ratio",[first_color,second_color],['Problems','Partial Solutions'])
	return concepts_json