Spaces:
Running
Running
charlieoneill
commited on
Commit
•
3187d23
1
Parent(s):
5df6c06
yep
Browse files- .gitignore +1 -0
- app.py +1 -129
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
data/
|
app.py
CHANGED
@@ -55,8 +55,6 @@ def download_all_files():
|
|
55 |
|
56 |
# Load configuration and initialize OpenAI client
|
57 |
download_all_files()
|
58 |
-
# config = yaml.safe_load(open('../config.yaml', 'r'))
|
59 |
-
# client = OpenAI(api_key=config['jwu_openai_key'])
|
60 |
|
61 |
# Load the API key from the environment variable
|
62 |
api_key = os.getenv('openai_key')
|
@@ -100,10 +98,6 @@ def load_subject_data(subject):
|
|
100 |
decoder = weights['decoder.weight'].cpu().numpy()
|
101 |
del weights
|
102 |
|
103 |
-
# # Load feature families
|
104 |
-
# with open(families_path, 'r') as f:
|
105 |
-
# feature_families = json.load(f)
|
106 |
-
|
107 |
with open(family_analysis_path, 'r') as f:
|
108 |
family_analysis = json.load(f)
|
109 |
|
@@ -533,11 +527,6 @@ def create_interface():
|
|
533 |
visualize_button = gr.Button("Visualize Feature")
|
534 |
|
535 |
feature_info = gr.Markdown()
|
536 |
-
# abstracts_heading = gr.Markdown("## Top 5 Abstracts")
|
537 |
-
# top_abstracts = gr.Dataframe(
|
538 |
-
# headers=["Title", "Activation value"],
|
539 |
-
# interactive=False
|
540 |
-
# )
|
541 |
|
542 |
abstracts_heading = gr.Markdown("## Top 5 Abstracts")
|
543 |
top_abstracts = gr.Dataframe(
|
@@ -597,46 +586,6 @@ def create_interface():
|
|
597 |
inputs=[feature_matches, subject],
|
598 |
outputs=[feature_info, top_abstracts, top_correlated, bottom_correlated, co_occurring_features, activation_dist, feature_search, feature_matches]
|
599 |
)
|
600 |
-
# with gr.Row():
|
601 |
-
# feature_search = gr.Textbox(label="Search Feature Labels")
|
602 |
-
# feature_matches = gr.CheckboxGroup(label="Matching Features", choices=[])
|
603 |
-
# visualize_button = gr.Button("Visualize Feature")
|
604 |
-
|
605 |
-
# feature_info = gr.Markdown()
|
606 |
-
|
607 |
-
# abstracts_heading = gr.Markdown("## Top 5 Abstracts")
|
608 |
-
# top_abstracts = gr.Dataframe(
|
609 |
-
# headers=["Title", "Activation value"],
|
610 |
-
# datatype=["markdown", "number"],
|
611 |
-
# interactive=False,
|
612 |
-
# wrap=True
|
613 |
-
# )
|
614 |
-
|
615 |
-
# gr.Markdown("## Correlated Features")
|
616 |
-
# with gr.Row():
|
617 |
-
# with gr.Column(scale=1):
|
618 |
-
# gr.Markdown("### Top 5 Correlated Features")
|
619 |
-
# top_correlated = gr.Dataframe(
|
620 |
-
# headers=["Feature", "Cosine similarity"],
|
621 |
-
# interactive=False
|
622 |
-
# )
|
623 |
-
# with gr.Column(scale=1):
|
624 |
-
# gr.Markdown("### Bottom 5 Correlated Features")
|
625 |
-
# bottom_correlated = gr.Dataframe(
|
626 |
-
# headers=["Feature", "Cosine similarity"],
|
627 |
-
# interactive=False
|
628 |
-
# )
|
629 |
-
|
630 |
-
# with gr.Row():
|
631 |
-
# with gr.Column(scale=1):
|
632 |
-
# gr.Markdown("## Top 5 Co-occurring Features")
|
633 |
-
# co_occurring_features = gr.Dataframe(
|
634 |
-
# headers=["Feature", "Co-occurrences"],
|
635 |
-
# interactive=False
|
636 |
-
# )
|
637 |
-
# with gr.Column(scale=1):
|
638 |
-
# gr.Markdown(f"## Activation Value Distribution")
|
639 |
-
# activation_dist = gr.Plot()
|
640 |
|
641 |
with gr.Tab("Feature Families"):
|
642 |
gr.Markdown("# Feature Families")
|
@@ -652,11 +601,7 @@ def create_interface():
|
|
652 |
datatype=["markdown", "number", "number"],
|
653 |
label="Family and Child Features"
|
654 |
)
|
655 |
-
|
656 |
-
# headers=["Feature", "F1 Score", "Pearson Correlation"],
|
657 |
-
# datatype=["str", "number", "number"],
|
658 |
-
# label="Family and Child Features"
|
659 |
-
# )
|
660 |
|
661 |
def search_feature_families(search_text, current_subject):
|
662 |
family_analysis = subject_data[current_subject]['family_analysis']
|
@@ -665,74 +610,6 @@ def create_interface():
|
|
665 |
matches = [family['superfeature'] for family in family_analysis if search_text.lower() in family['superfeature'].lower()]
|
666 |
return gr.CheckboxGroup(choices=matches[:10]) # Limit to top 10 matches
|
667 |
|
668 |
-
# def visualize_feature_family(selected_families, current_subject):
|
669 |
-
# if not selected_families:
|
670 |
-
# return "Please select a feature family to visualize.", None
|
671 |
-
|
672 |
-
# selected_family = selected_families[0] # Take the first selected family
|
673 |
-
# family_analysis = subject_data[current_subject]['family_analysis']
|
674 |
-
|
675 |
-
# family_data = next((family for family in family_analysis if family['superfeature'] == selected_family), None)
|
676 |
-
# if not family_data:
|
677 |
-
# return "Invalid feature family selected.", None
|
678 |
-
|
679 |
-
# output = f"# {family_data['superfeature']}\n\n"
|
680 |
-
# output += f"## Super Reasoning\n{family_data['super_reasoning']}\n\n"
|
681 |
-
|
682 |
-
# # Create DataFrame
|
683 |
-
# df_data = [
|
684 |
-
# {
|
685 |
-
# "Feature": family_data['superfeature'],
|
686 |
-
# "F1 Score": family_data['family_f1'],
|
687 |
-
# "Pearson Correlation": family_data['family_pearson']
|
688 |
-
# }
|
689 |
-
# ]
|
690 |
-
|
691 |
-
# for name, f1, pearson in zip(family_data['feature_names'], family_data['feature_f1'], family_data['feature_pearson']):
|
692 |
-
# df_data.append({
|
693 |
-
# "Feature": name,
|
694 |
-
# "F1 Score": f1,
|
695 |
-
# "Pearson Correlation": pearson
|
696 |
-
# })
|
697 |
-
|
698 |
-
# df = pd.DataFrame(df_data)
|
699 |
-
|
700 |
-
# return output, df
|
701 |
-
|
702 |
-
# def visualize_feature_family(selected_families, current_subject):
|
703 |
-
# if not selected_families:
|
704 |
-
# return "Please select a feature family to visualize.", None, "", []
|
705 |
-
|
706 |
-
# selected_family = selected_families[0] # Take the first selected family
|
707 |
-
# family_analysis = subject_data[current_subject]['family_analysis']
|
708 |
-
|
709 |
-
# family_data = next((family for family in family_analysis if family['superfeature'] == selected_family), None)
|
710 |
-
# if not family_data:
|
711 |
-
# return "Invalid feature family selected.", None, "", []
|
712 |
-
|
713 |
-
# output = f"# {family_data['superfeature']}\n\n"
|
714 |
-
# output += f"## Super Reasoning\n{family_data['super_reasoning']}\n\n"
|
715 |
-
|
716 |
-
# # Create DataFrame
|
717 |
-
# df_data = [
|
718 |
-
# {
|
719 |
-
# "Feature": family_data['superfeature'],
|
720 |
-
# "F1 Score": family_data['family_f1'],
|
721 |
-
# "Pearson Correlation": family_data['family_pearson']
|
722 |
-
# }
|
723 |
-
# ]
|
724 |
-
|
725 |
-
# for name, f1, pearson in zip(family_data['feature_names'], family_data['feature_f1'], family_data['feature_pearson']):
|
726 |
-
# df_data.append({
|
727 |
-
# "Feature": name,
|
728 |
-
# "F1 Score": f1,
|
729 |
-
# "Pearson Correlation": pearson
|
730 |
-
# })
|
731 |
-
|
732 |
-
# df = pd.DataFrame(df_data)
|
733 |
-
|
734 |
-
# return output, df, "", [] # Return empty string for search box and empty list for checkbox
|
735 |
-
|
736 |
def visualize_feature_family(selected_families, current_subject):
|
737 |
if not selected_families:
|
738 |
return "Please select a feature family to visualize.", None, "", []
|
@@ -753,11 +630,6 @@ def create_interface():
|
|
753 |
"F1 Score": round(family_data['family_f1'], 2),
|
754 |
"Pearson Correlation": round(family_data['family_pearson'], 4)
|
755 |
},
|
756 |
-
# {
|
757 |
-
# "Feature": "## Child Features",
|
758 |
-
# "F1 Score": None,
|
759 |
-
# "Pearson Correlation": None
|
760 |
-
# }
|
761 |
]
|
762 |
|
763 |
for name, f1, pearson in zip(family_data['feature_names'], family_data['feature_f1'], family_data['feature_pearson']):
|
|
|
55 |
|
56 |
# Load configuration and initialize OpenAI client
|
57 |
download_all_files()
|
|
|
|
|
58 |
|
59 |
# Load the API key from the environment variable
|
60 |
api_key = os.getenv('openai_key')
|
|
|
98 |
decoder = weights['decoder.weight'].cpu().numpy()
|
99 |
del weights
|
100 |
|
|
|
|
|
|
|
|
|
101 |
with open(family_analysis_path, 'r') as f:
|
102 |
family_analysis = json.load(f)
|
103 |
|
|
|
527 |
visualize_button = gr.Button("Visualize Feature")
|
528 |
|
529 |
feature_info = gr.Markdown()
|
|
|
|
|
|
|
|
|
|
|
530 |
|
531 |
abstracts_heading = gr.Markdown("## Top 5 Abstracts")
|
532 |
top_abstracts = gr.Dataframe(
|
|
|
586 |
inputs=[feature_matches, subject],
|
587 |
outputs=[feature_info, top_abstracts, top_correlated, bottom_correlated, co_occurring_features, activation_dist, feature_search, feature_matches]
|
588 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
589 |
|
590 |
with gr.Tab("Feature Families"):
|
591 |
gr.Markdown("# Feature Families")
|
|
|
601 |
datatype=["markdown", "number", "number"],
|
602 |
label="Family and Child Features"
|
603 |
)
|
604 |
+
|
|
|
|
|
|
|
|
|
605 |
|
606 |
def search_feature_families(search_text, current_subject):
|
607 |
family_analysis = subject_data[current_subject]['family_analysis']
|
|
|
610 |
matches = [family['superfeature'] for family in family_analysis if search_text.lower() in family['superfeature'].lower()]
|
611 |
return gr.CheckboxGroup(choices=matches[:10]) # Limit to top 10 matches
|
612 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
613 |
def visualize_feature_family(selected_families, current_subject):
|
614 |
if not selected_families:
|
615 |
return "Please select a feature family to visualize.", None, "", []
|
|
|
630 |
"F1 Score": round(family_data['family_f1'], 2),
|
631 |
"Pearson Correlation": round(family_data['family_pearson'], 4)
|
632 |
},
|
|
|
|
|
|
|
|
|
|
|
633 |
]
|
634 |
|
635 |
for name, f1, pearson in zip(family_data['feature_names'], family_data['feature_f1'], family_data['feature_pearson']):
|