Update scrape_3gpp.py
Browse files- scrape_3gpp.py +17 -3
scrape_3gpp.py
CHANGED
@@ -428,9 +428,23 @@ def extractionPrincipale(url, excel_file=None, status_list=None, progress=gr.Pro
|
|
428 |
|
429 |
# After processing all files and directories
|
430 |
# Read the guide.xlsx file into a DataFrame to map 'TDoc' to 'Source'
|
431 |
-
|
432 |
-
|
433 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
434 |
tdoc_source_map = {row['TDoc']: row['Source'] for index, row in guide_df.iterrows()}
|
435 |
tdoc_status_map = {row['TDoc']: row['TDoc Status'] for index, row in guide_df.iterrows()}
|
436 |
# Update the 'Source' in your data based on matching 'Nom du fichier' with 'TDoc'
|
|
|
428 |
|
429 |
# After processing all files and directories
|
430 |
# Read the guide.xlsx file into a DataFrame to map 'TDoc' to 'Source'
|
431 |
+
guide_df = None
|
432 |
+
|
433 |
+
# Attempt to load the guide.xlsx file if it exists
|
434 |
+
guide_file_path = 'guide.xlsx'
|
435 |
+
if os.path.exists(guide_file_path):
|
436 |
+
guide_df = pd.read_excel(guide_file_path, usecols=['Source', 'TDoc', 'TDoc Status'])
|
437 |
+
else:
|
438 |
+
print(f"Warning: {guide_file_path} not found.")
|
439 |
+
|
440 |
+
# Proceed with the rest of the function, ensuring guide_df is checked before use
|
441 |
+
if guide_df is not None:
|
442 |
+
tdoc_source_map = {row['TDoc']: row['Source'] for index, row in guide_df.iterrows()}
|
443 |
+
# Use tdoc_source_map as needed
|
444 |
+
else:
|
445 |
+
print("Error: guide_df is not initialized. Exiting function.")
|
446 |
+
return
|
447 |
+
|
448 |
tdoc_source_map = {row['TDoc']: row['Source'] for index, row in guide_df.iterrows()}
|
449 |
tdoc_status_map = {row['TDoc']: row['TDoc Status'] for index, row in guide_df.iterrows()}
|
450 |
# Update the 'Source' in your data based on matching 'Nom du fichier' with 'TDoc'
|