veeps commited on
Commit
8375908
1 Parent(s): d945549

pulling rff endorsements

Browse files
Files changed (1) hide show
  1. get_rff_endorsements.py +36 -0
get_rff_endorsements.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from bs4 import BeautifulSoup
2
+ import pandas as pd
3
+
4
+ # Open and read the HTML file
5
+ with open("rff.html", 'r', encoding='utf-8') as file:
6
+ html_content = file.read()
7
+
8
+
9
+ # Parse the HTML content
10
+ soup = BeautifulSoup(html_content, 'html.parser')
11
+
12
+
13
+ # Find all article elements
14
+ articles = soup.find_all('article', class_='card4')
15
+
16
+ # Initialize a list to store the data
17
+ data = []
18
+
19
+ # Loop through each article to extract the required information
20
+ for article in articles:
21
+ # Extract the name from the span with class '-a:1 -as:3 -as:t1'
22
+ name_span = article.find('span', class_='-a:1 -as:3 -as:t1')
23
+ name = name_span.text.strip() if name_span else 'N/A' # Handle cases where the span might not exist
24
+
25
+ # Extract the state from the paragraph with class 'card4-role -t:11'
26
+ state_paragraph = article.find('p', class_='card4-role -t:11')
27
+ state = state_paragraph.text.strip() if state_paragraph else 'N/A' # Handle cases where the paragraph might not exist
28
+
29
+ # Append the extracted data to the list
30
+ data.append({'Name': name, 'State': state})
31
+
32
+ # Create a DataFrame from the data
33
+ df = pd.DataFrame(data)
34
+ df.tail()
35
+
36
+ df.to_csv("rff_endorsements.csv", index = False)