Spaces:

dejanseo
/

linkbert

Running

dejanseo commited on May 2

Commit

d598c67

•

1 Parent(s): 7efe121

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -90,20 +90,20 @@ def process_text(inputs: str, confidence_threshold: float):
                 if last_end < subtoken_start:
                     reconstructed_text += chunk[last_end:subtoken_start]
                 if word_data['prediction'] == 1:
-                    reconstructed_text += f"<span style='background-color: rgba(0, 255, 0); display: inline;'>{subtoken_text}</span>"
                 else:
-                    reconstructed_text += subtoken_text
                 last_end = subtoken_end
-                df_data['Word'].append(subtoken_text)
                 df_data['Prediction'].append(word_data['prediction'])
-                df_data['Confidence'].append(word_data['confidence'])
                 df_data['Start'].append(subtoken_start + original_position_offset)
                 df_data['End'].append(subtoken_end + original_position_offset)
             original_position_offset += len(chunk) + 1
-        reconstructed_text += chunk[last_end:]
     df_tokens = pd.DataFrame(df_data)
     return reconstructed_text, df_tokens

                 if last_end < subtoken_start:
                     reconstructed_text += chunk[last_end:subtoken_start]
                 if word_data['prediction'] == 1:
+                    reconstructed_text += f"<span style='background-color: rgba(0, 255, 0); display: inline;'>{subtoken_text.replace('$', '\\$')}</span>"
                 else:
+                    reconstructed_text += subtoken_text.replace('$', '\\$')
                 last_end = subtoken_end
+                df_data['Word'].append(subtoken_text.replace('$', '\\$'))
                 df_data['Prediction'].append(word_data['prediction'])
+                df_data['Confidence'].append(word_info[word_start]['confidence'])
                 df_data['Start'].append(subtoken_start + original_position_offset)
                 df_data['End'].append(subtoken_end + original_position_offset)
             original_position_offset += len(chunk) + 1
+        reconstructed_text += chunk[last_end:].replace('$', '\\$')
     df_tokens = pd.DataFrame(df_data)
     return reconstructed_text, df_tokens