Spaces:

crimeacs
/

phase-hunter

Runtime error

App Files Files Community

crimeacs commited on Apr 5, 2023

Commit

6767598

•

1 Parent(s): 66ff4f5

fixed a bug with uncertainties

Browse files

Files changed (1) hide show

app.py +70 -41

app.py CHANGED Viewed

@@ -114,7 +114,7 @@ def variance_coefficient(residuals):
     coeff = 1 - (var / (residuals.max() - residuals.min()))
     return coeff
-def predict_on_section(client_name, timestamp, eq_lat, eq_lon, radius_km, source_depth_km, velocity_model, max_waveforms):
     distances, t0s, st_lats, st_lons, waveforms, names = [], [], [], [], [], []
     taup_model = TauPyModel(model=velocity_model)
@@ -137,6 +137,7 @@ def predict_on_section(client_name, timestamp, eq_lat, eq_lon, radius_km, source
                             minlongitude=(eq_lon-window), maxlongitude=(eq_lon+window),
                             level='station')
         print('Finished downloading inventory')
     except (IndexError, FDSNNoDataException, FDSNTimeoutException, FDSNInternalServerException):
         fig, ax = plt.subplots()
         ax.text(0.5,0.5,'Something is wrong with the data provider, try another')
@@ -149,7 +150,6 @@ def predict_on_section(client_name, timestamp, eq_lat, eq_lon, radius_km, source
     cached_waveforms = glob("data/cached/*.mseed")
     for network in inv:
-        # Skip the SYntetic networks
         if network.code == 'SY':
             continue
         for station in network:
@@ -165,8 +165,9 @@ def predict_on_section(client_name, timestamp, eq_lat, eq_lon, radius_km, source
                 starttime = obspy.UTCDateTime(timestamp) + arrivals[0].time - 15
                 endtime = starttime + 60
                 try:
-                    if f"data/cached/{network.code}_{station.code}_{starttime}.mseed" not in cached_waveforms:
-                        print('Downloading waveform')
                         waveform = client.get_waveforms(network=network.code, station=station.code, location="*", channel="*",
                                                     starttime=starttime, endtime=endtime)
                         waveform.write(f"data/cached/{network.code}_{station.code}_{starttime}.mseed", format="MSEED")
@@ -207,12 +208,16 @@ def predict_on_section(client_name, timestamp, eq_lat, eq_lon, radius_km, source
     # If there are no waveforms, return an empty plot
     if len(waveforms) == 0:
         fig, ax = plt.subplots()
         ax.text(0.5,0.5,'No waveforms found')
         fig.canvas.draw();
         image = np.array(fig.canvas.renderer.buffer_rgba())
         plt.close(fig)
-        return image
     first_distances = bin_distances(distances, bin_size=10/111.2)
@@ -239,9 +244,12 @@ def predict_on_section(client_name, timestamp, eq_lat, eq_lon, radius_km, source
     p_phases = output[:, 0]
     s_phases = output[:, 1]
-    # Max confidence - min variance
-    p_max_confidence = np.min([p_phases[i::len(waveforms)].std() for i in range(len(waveforms))])
-    s_max_confidence = np.min([s_phases[i::len(waveforms)].std() for i in range(len(waveforms))])
     print(f"Starting plotting {len(waveforms)} waveforms")
     fig, ax = plt.subplots(ncols=3, figsize=(10, 3))
@@ -266,17 +274,18 @@ def predict_on_section(client_name, timestamp, eq_lat, eq_lon, radius_km, source
     topo_map.da.plot(ax = ax[2], cmap='Greys', add_colorbar=False, add_labels=False)
     ax[1].imshow(hillshade, cmap="Greys", alpha=0.5)
-    output_picks = pd.DataFrame({'station_name' : [], 'starttime' : [],
                                  'p_phase, s' : [], 'p_uncertainty, s' : [],
                                  's_phase, s' : [], 's_uncertainty, s' : [],
                                  'velocity_p, km/s' : [], 'velocity_s, km/s' : []})
     for i in range(len(waveforms)):
         print(f"Plotting waveform {i+1}/{len(waveforms)}")
-        current_P = p_phases[i::len(waveforms)]
-        current_S = s_phases[i::len(waveforms)]
         x = [t0s[i] + pd.Timedelta(seconds=k/100) for k in np.linspace(0,6000,6000)]
         x = mdates.date2num(x)
@@ -284,32 +293,40 @@ def predict_on_section(client_name, timestamp, eq_lat, eq_lon, radius_km, source
         p_conf = 1/(current_P.std()/p_max_confidence).item()
         s_conf = 1/(current_S.std()/s_max_confidence).item()
-        ax[0].plot(x, waveforms[i][0, 0]*10+distances[i]*111.2, color='black', alpha=0.5, lw=1)
-        ax[0].scatter(x[int(current_P.mean()*waveforms[i][0].shape[-1])], waveforms[i][0, 0].mean()+distances[i]*111.2, color='r', alpha=p_conf, marker='|')
-        ax[0].scatter(x[int(current_S.mean()*waveforms[i][0].shape[-1])], waveforms[i][0, 0].mean()+distances[i]*111.2, color='b', alpha=s_conf, marker='|')
-        ax[0].set_ylabel('Z')
-        delta_t = t0s[i].timestamp - obspy.UTCDateTime(timestamp).timestamp
-        velocity_p = (distances[i]*111.2)/(delta_t+current_P.mean()*60).item()
-        velocity_s = (distances[i]*111.2)/(delta_t+current_S.mean()*60).item()
         print(f"Station {st_lats[i]}, {st_lons[i]} has P velocity {velocity_p} and S velocity {velocity_s}")
-        output_picks = output_picks.append(pd.DataFrame({'station_name': [names[i]], 'starttime' : [str(t0s[i])],
                                                         'p_phase, s' : [(delta_t+current_P.mean()*60).item()], 'p_uncertainty, s' : [current_P.std().item()*60],
                                                         's_phase, s' : [(delta_t+current_S.mean()*60).item()], 's_uncertainty, s' : [current_S.std().item()*60],
                                                         'velocity_p, km/s' : [velocity_p], 'velocity_s, km/s' : [velocity_s]}))
-        # Generate an array from st_lat to eq_lat and from st_lon to eq_lon
-        x = np.linspace(st_lons[i], eq_lon, 50)
-        y = np.linspace(st_lats[i], eq_lat, 50)
-        # Plot the array
-        ax[1].scatter(x, y, c=np.zeros_like(x)+velocity_p, alpha=0.1, vmin=0, vmax=8)
-        ax[2].scatter(x, y, c=np.zeros_like(x)+velocity_s, alpha=0.1, vmin=0, vmax=8)
     # Add legend
     ax[0].scatter(None, None, color='r', marker='|', label='P')
     ax[0].scatter(None, None, color='b', marker='|', label='S')
@@ -341,18 +358,11 @@ def predict_on_section(client_name, timestamp, eq_lat, eq_lon, radius_km, source
     fig.canvas.draw();
     image = np.array(fig.canvas.renderer.buffer_rgba())
     plt.close(fig)
-    output_picks.to_csv('data/picks.csv', index=False)
-    output_csv = 'data/picks.csv'
     return image, output_picks, output_csv
-def download_picks(output_picks):
-    output_csv = io.BytesIO()
-    output_picks.to_csv(output_csv, index=False)
-    output_csv.seek(0)
-    return output_csv
 model = torch.jit.load("model.pt")
 with gr.Blocks() as demo:
@@ -410,7 +420,7 @@ with gr.Blocks() as demo:
         gr.HTML("""
         <div style="padding: 20px; border-radius: 10px; font-size: 16px;">
         <p style="font-weight: bold; font-size: 24px; margin-bottom: 20px;">Using PhaseHunter to Analyze Seismic Waveforms</p>
-        <p>Select an earthquake from the global earthquake catalogue and the app will download the waveform from the FDSN client of your choice. The app will use a velocity model of your choice to select appropriate time windows for each station within a specified radius of the earthquake.</p>
         <p>The app will then analyze the waveforms and mark the detected phases on the waveform. Pick data for each waveform is reported in seconds from the start of the waveform.</p>
         <p>Velocities are derived from distance and travel time determined by PhaseHunter picks (<span style="font-style: italic;">v = distance/predicted_pick_time</span>). The background of the velocity plot is colored by DEM.</p>
         </div>
@@ -462,7 +472,8 @@ with gr.Blocks() as demo:
             with gr.Column(scale=2):
                 radius_inputs = gr.Slider(minimum=1,
                                         maximum=200,
-                                        value=50, label="Radius (km)",
                                         step=10,
                                         info="""Select the radius around the earthquake to download data from.\n
                                         Note that the larger the radius, the longer the app will take to run.""",
@@ -476,6 +487,23 @@ with gr.Blocks() as demo:
                                 info="Maximum number of waveforms to show per section\n (to avoid long prediction times)",
                                 interactive=True,
                                 )
         button = gr.Button("Predict phases")
         output_image = gr.Image(label='Waveforms with Phases Marked', type='numpy', interactive=False)
@@ -490,7 +518,8 @@ with gr.Blocks() as demo:
                  inputs=[client_inputs, timestamp_inputs,
                          eq_lat_inputs, eq_lon_inputs,
                          radius_inputs, source_depth_inputs,
-                         velocity_inputs, max_waveforms_inputs],
                  outputs=[output_image, output_picks, output_csv])
 demo.launch()

     coeff = 1 - (var / (residuals.max() - residuals.min()))
     return coeff
+def predict_on_section(client_name, timestamp, eq_lat, eq_lon, radius_km, source_depth_km, velocity_model, max_waveforms, conf_thres_P, conf_thres_S):
     distances, t0s, st_lats, st_lons, waveforms, names = [], [], [], [], [], []
     taup_model = TauPyModel(model=velocity_model)
                             minlongitude=(eq_lon-window), maxlongitude=(eq_lon+window),
                             level='station')
         print('Finished downloading inventory')
     except (IndexError, FDSNNoDataException, FDSNTimeoutException, FDSNInternalServerException):
         fig, ax = plt.subplots()
         ax.text(0.5,0.5,'Something is wrong with the data provider, try another')
     cached_waveforms = glob("data/cached/*.mseed")
     for network in inv:
         if network.code == 'SY':
             continue
         for station in network:
                 starttime = obspy.UTCDateTime(timestamp) + arrivals[0].time - 15
                 endtime = starttime + 60
                 try:
+                    filename=f'{network.code}_{station.code}_{starttime}'
+                    if f"data/cached/{filename}.mseed" not in cached_waveforms:
+                        print(f'Downloading waveform for {filename}')
                         waveform = client.get_waveforms(network=network.code, station=station.code, location="*", channel="*",
                                                     starttime=starttime, endtime=endtime)
                         waveform.write(f"data/cached/{network.code}_{station.code}_{starttime}.mseed", format="MSEED")
     # If there are no waveforms, return an empty plot
     if len(waveforms) == 0:
+        print('No waveforms found')
         fig, ax = plt.subplots()
         ax.text(0.5,0.5,'No waveforms found')
         fig.canvas.draw();
         image = np.array(fig.canvas.renderer.buffer_rgba())
         plt.close(fig)
+        output_picks = pd.DataFrame()
+        output_picks.to_csv('data/picks.csv', index=False)
+        output_csv = 'data/picks.csv'
+        return image, output_picks, output_csv
     first_distances = bin_distances(distances, bin_size=10/111.2)
     p_phases = output[:, 0]
     s_phases = output[:, 1]
+    p_phases = p_phases.reshape(len(waveforms),-1)
+    s_phases = s_phases.reshape(len(waveforms),-1)
+    # Max confidence - min variance
+    p_max_confidence = p_phases.std(axis=-1).min()
+    s_max_confidence = s_phases.std(axis=-1).min()
     print(f"Starting plotting {len(waveforms)} waveforms")
     fig, ax = plt.subplots(ncols=3, figsize=(10, 3))
     topo_map.da.plot(ax = ax[2], cmap='Greys', add_colorbar=False, add_labels=False)
     ax[1].imshow(hillshade, cmap="Greys", alpha=0.5)
+    output_picks = pd.DataFrame({'station_name' : [],
+                                'st_lat' : [], 'st_lon' : [],
+                                 'starttime' : [],
                                  'p_phase, s' : [], 'p_uncertainty, s' : [],
                                  's_phase, s' : [], 's_uncertainty, s' : [],
                                  'velocity_p, km/s' : [], 'velocity_s, km/s' : []})
     for i in range(len(waveforms)):
         print(f"Plotting waveform {i+1}/{len(waveforms)}")
+        current_P = p_phases[i]
+        current_S = s_phases[i]
         x = [t0s[i] + pd.Timedelta(seconds=k/100) for k in np.linspace(0,6000,6000)]
         x = mdates.date2num(x)
         p_conf = 1/(current_P.std()/p_max_confidence).item()
         s_conf = 1/(current_S.std()/s_max_confidence).item()
+        delta_t = t0s[i].timestamp - obspy.UTCDateTime(timestamp).timestamp
+        ax[0].plot(x, waveforms[i][0, 0]*10+distances[i]*111.2, color='black', alpha=0.5, lw=1)
+        if (current_P.std().item()*60 < conf_thres_P) or (current_S.std().item()*60 < conf_thres_S):
+            ax[0].scatter(x[int(current_P.mean()*waveforms[i][0].shape[-1])], waveforms[i][0, 0].mean()+distances[i]*111.2, color='r', alpha=p_conf, marker='|')
+            ax[0].scatter(x[int(current_S.mean()*waveforms[i][0].shape[-1])], waveforms[i][0, 0].mean()+distances[i]*111.2, color='b', alpha=s_conf, marker='|')
+            velocity_p = (distances[i]*111.2)/(delta_t+current_P.mean()*60).item()
+            velocity_s = (distances[i]*111.2)/(delta_t+current_S.mean()*60).item()
+            # Generate an array from st_lat to eq_lat and from st_lon to eq_lon
+            x = np.linspace(st_lons[i], eq_lon, 50)
+            y = np.linspace(st_lats[i], eq_lat, 50)
+            # Plot the array
+            ax[1].scatter(x, y, c=np.zeros_like(x)+velocity_p, alpha=0.1, vmin=0, vmax=8)
+            ax[2].scatter(x, y, c=np.zeros_like(x)+velocity_s, alpha=0.1, vmin=0, vmax=8)
+        else:
+            velocity_p = np.nan
+            velocity_s = np.nan
+        ax[0].set_ylabel('Z')
         print(f"Station {st_lats[i]}, {st_lons[i]} has P velocity {velocity_p} and S velocity {velocity_s}")
+        output_picks = output_picks.append(pd.DataFrame({'station_name': [names[i]],
+                                                        'st_lat' : [st_lats[i]], 'st_lon' : [st_lons[i]],
+                                                        'starttime' : [str(t0s[i])],
                                                         'p_phase, s' : [(delta_t+current_P.mean()*60).item()], 'p_uncertainty, s' : [current_P.std().item()*60],
                                                         's_phase, s' : [(delta_t+current_S.mean()*60).item()], 's_uncertainty, s' : [current_S.std().item()*60],
                                                         'velocity_p, km/s' : [velocity_p], 'velocity_s, km/s' : [velocity_s]}))
     # Add legend
     ax[0].scatter(None, None, color='r', marker='|', label='P')
     ax[0].scatter(None, None, color='b', marker='|', label='S')
     fig.canvas.draw();
     image = np.array(fig.canvas.renderer.buffer_rgba())
     plt.close(fig)
+    output_picks.to_csv(f'data/velocity/{eq_lat}_{eq_lon}_{timestamp}_{len(waveforms)}.csv', index=False)
+    output_csv = f'data/velocity/{eq_lat}_{eq_lon}_{timestamp}_{len(waveforms)}.csv'
     return image, output_picks, output_csv
 model = torch.jit.load("model.pt")
 with gr.Blocks() as demo:
         gr.HTML("""
         <div style="padding: 20px; border-radius: 10px; font-size: 16px;">
         <p style="font-weight: bold; font-size: 24px; margin-bottom: 20px;">Using PhaseHunter to Analyze Seismic Waveforms</p>
+        <p>Select an earthquake from the global earthquake catalogue (e.g. <a href="https://earthquake.usgs.gov/earthquakes/map">USGS</a>) and the app will download the waveform from the FDSN client of your choice. The app will use a velocity model of your choice to select appropriate time windows for each station within a specified radius of the earthquake.</p>
         <p>The app will then analyze the waveforms and mark the detected phases on the waveform. Pick data for each waveform is reported in seconds from the start of the waveform.</p>
         <p>Velocities are derived from distance and travel time determined by PhaseHunter picks (<span style="font-style: italic;">v = distance/predicted_pick_time</span>). The background of the velocity plot is colored by DEM.</p>
         </div>
             with gr.Column(scale=2):
                 radius_inputs = gr.Slider(minimum=1,
                                         maximum=200,
+                                        value=50,
+                                        label="Radius (km)",
                                         step=10,
                                         info="""Select the radius around the earthquake to download data from.\n
                                         Note that the larger the radius, the longer the app will take to run.""",
                                 info="Maximum number of waveforms to show per section\n (to avoid long prediction times)",
                                 interactive=True,
                                 )
+            with gr.Column(scale=2):
+                P_thres_inputs = gr.Slider(minimum=0.01,
+                                maximum=1,
+                                value=0.1,
+                                label="P uncertainty threshold, s",
+                                step=0.01,
+                                info="Acceptable uncertainty for P picks expressed in std() seconds",
+                                interactive=True,
+                                )
+                S_thres_inputs = gr.Slider(minimum=0.01,
+                                maximum=1,
+                                value=0.2,
+                                label="S uncertainty threshold, s",
+                                step=0.01,
+                                info="Acceptable uncertainty for S picks expressed in std() seconds",
+                                interactive=True,
+                                )
         button = gr.Button("Predict phases")
         output_image = gr.Image(label='Waveforms with Phases Marked', type='numpy', interactive=False)
                  inputs=[client_inputs, timestamp_inputs,
                          eq_lat_inputs, eq_lon_inputs,
                          radius_inputs, source_depth_inputs,
+                         velocity_inputs, max_waveforms_inputs,
+                         P_thres_inputs, S_thres_inputs],
                  outputs=[output_image, output_picks, output_csv])
 demo.launch()