import json import streamlit as st from google.oauth2 import service_account from google.cloud import language_v1 import urllib.parse import urllib.request import pandas as pd # Function to query Google's Knowledge Graph API def query_knowledge_graph(entity_id): try: google_search_link = f"https://www.google.com/search?kgmid={entity_id}" st.markdown(f'[Open in Google Search]({google_search_link})', unsafe_allow_html=True) except Exception as e: st.write(f"An error occurred: {e}") # Function to count entities with 'mid' that contains '/g/' or '/m/' in their metadata def count_entities(entities): count = 0 for entity in entities: metadata = entity.metadata if 'mid' in metadata and ('/g/' in metadata['mid'] or '/m/' in metadata['mid']): count += 1 return count # Function to serialize entity metadata def serialize_entity_metadata(metadata): return {k: str(v) for k, v in metadata.items()} # Function to export entities as a JSON or CSV file def export_entities(entities): entity_list = [] for entity in entities: entity_info = { "Name": entity.name, "Type": language_v1.Entity.Type(entity.type_).name, "Salience Score": entity.salience, "Metadata": serialize_entity_metadata(entity.metadata), "Mentions": [mention.text.content for mention in entity.mentions] } entity_list.append(entity_info) # Convert to DataFrame for easier export as CSV df = pd.DataFrame(entity_list) # Export as CSV csv = df.to_csv(index=False) st.download_button(label="Export Entities as CSV", data=csv, file_name="entities.csv", mime="text/csv") # Export as JSON json_data = json.dumps(entity_list, indent=2) st.download_button(label="Export Entities as JSON", data=json_data, file_name="entities.json", mime="application/json") # Sidebar content st.sidebar.title("About This Tool") st.sidebar.markdown("This tool leverages Google's NLP technology for entity analysis.") st.sidebar.markdown("### Step-by-Step Guide") st.sidebar.markdown(""" 1. **Open the Tool**: Navigate to the URL where the tool is hosted. 2. **User Input**: Enter the text you want to analyze. 3. **Analyze**: Click the 'Analyze' button. 4. **View Results**: See the identified entities and their details. 5. **Export Entities**: Export the entities as JSON or CSV. """) # Header and intro st.title("Google Cloud NLP Entity Analyzer") st.write("This tool analyzes text to identify entities such as people, locations, organizations, and events.") st.write("Entity salience scores are always relative to the analyzed text.") def sample_analyze_entities(text_content): service_account_info = json.loads(st.secrets["google_nlp"]) credentials = service_account.Credentials.from_service_account_info( service_account_info, scopes=["https://www.googleapis.com/auth/cloud-platform"] ) client = language_v1.LanguageServiceClient(credentials=credentials) document = {"content": text_content, "type_": language_v1.Document.Type.PLAIN_TEXT, "language": "en"} encoding_type = language_v1.EncodingType.UTF8 response = client.analyze_entities(request={"document": document, "encoding_type": encoding_type}) # Count the entities with 'mid' and either '/g/' or '/m/' in their metadata entity_count = count_entities(response.entities) if entity_count == 0: st.markdown(f"# We found {len(response.entities)} entities - but found no Google Entities") st.write("---") elif entity_count == 1: st.markdown(f"# We found {len(response.entities)} entities - and found 1 Google Entity") st.write("---") else: st.markdown(f"# We found {len(response.entities)} entities - and found {entity_count} Google Entities") st.write("---") for i, entity in enumerate(response.entities): st.write(f"Entity {i+1} of {len(response.entities)}") st.write(f"Name: {entity.name}") st.write(f"Type: {language_v1.Entity.Type(entity.type_).name}") st.write(f"Salience Score: {entity.salience}") if entity.metadata: st.write("Metadata:") st.write(entity.metadata) if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid']): entity_id = entity.metadata['mid'] query_knowledge_graph(entity_id) if entity.mentions: mention_count = len(entity.mentions) plural = "s" if mention_count > 1 else "" st.write(f"Mentions: {mention_count} mention{plural}") st.write("Raw Array:") st.write(entity.mentions) st.write("---") # Add the export functionality export_entities(response.entities) # User input for text analysis user_input = st.text_area("Enter text to analyze") if st.button("Analyze"): if user_input: sample_analyze_entities(user_input)