Spaces:

veeps
/

unstoppable_app

Sleeping

veeps commited on Sep 18

Commit

8375908

•

1 Parent(s): d945549

pulling rff endorsements

Files changed (1) hide show

get_rff_endorsements.py ADDED Viewed

+from bs4 import BeautifulSoup
+import pandas as pd
+# Open and read the HTML file
+with open("rff.html", 'r', encoding='utf-8') as file:
+    html_content = file.read()
+# Parse the HTML content
+soup = BeautifulSoup(html_content, 'html.parser')
+# Find all article elements
+articles = soup.find_all('article', class_='card4')
+# Initialize a list to store the data
+data = []
+# Loop through each article to extract the required information
+for article in articles:
+    # Extract the name from the span with class '-a:1 -as:3 -as:t1'
+    name_span = article.find('span', class_='-a:1 -as:3 -as:t1')
+    name = name_span.text.strip() if name_span else 'N/A'  # Handle cases where the span might not exist
+    # Extract the state from the paragraph with class 'card4-role -t:11'
+    state_paragraph = article.find('p', class_='card4-role -t:11')
+    state = state_paragraph.text.strip() if state_paragraph else 'N/A'  # Handle cases where the paragraph might not exist
+    # Append the extracted data to the list
+    data.append({'Name': name, 'State': state})
+# Create a DataFrame from the data
+df = pd.DataFrame(data)
+df.tail()
+df.to_csv("rff_endorsements.csv", index = False)