crimeacs commited on
Commit
6767598
1 Parent(s): 66ff4f5

fixed a bug with uncertainties

Browse files
Files changed (1) hide show
  1. app.py +70 -41
app.py CHANGED
@@ -114,7 +114,7 @@ def variance_coefficient(residuals):
114
  coeff = 1 - (var / (residuals.max() - residuals.min()))
115
  return coeff
116
 
117
- def predict_on_section(client_name, timestamp, eq_lat, eq_lon, radius_km, source_depth_km, velocity_model, max_waveforms):
118
  distances, t0s, st_lats, st_lons, waveforms, names = [], [], [], [], [], []
119
 
120
  taup_model = TauPyModel(model=velocity_model)
@@ -137,6 +137,7 @@ def predict_on_section(client_name, timestamp, eq_lat, eq_lon, radius_km, source
137
  minlongitude=(eq_lon-window), maxlongitude=(eq_lon+window),
138
  level='station')
139
  print('Finished downloading inventory')
 
140
  except (IndexError, FDSNNoDataException, FDSNTimeoutException, FDSNInternalServerException):
141
  fig, ax = plt.subplots()
142
  ax.text(0.5,0.5,'Something is wrong with the data provider, try another')
@@ -149,7 +150,6 @@ def predict_on_section(client_name, timestamp, eq_lat, eq_lon, radius_km, source
149
  cached_waveforms = glob("data/cached/*.mseed")
150
 
151
  for network in inv:
152
- # Skip the SYntetic networks
153
  if network.code == 'SY':
154
  continue
155
  for station in network:
@@ -165,8 +165,9 @@ def predict_on_section(client_name, timestamp, eq_lat, eq_lon, radius_km, source
165
  starttime = obspy.UTCDateTime(timestamp) + arrivals[0].time - 15
166
  endtime = starttime + 60
167
  try:
168
- if f"data/cached/{network.code}_{station.code}_{starttime}.mseed" not in cached_waveforms:
169
- print('Downloading waveform')
 
170
  waveform = client.get_waveforms(network=network.code, station=station.code, location="*", channel="*",
171
  starttime=starttime, endtime=endtime)
172
  waveform.write(f"data/cached/{network.code}_{station.code}_{starttime}.mseed", format="MSEED")
@@ -207,12 +208,16 @@ def predict_on_section(client_name, timestamp, eq_lat, eq_lon, radius_km, source
207
 
208
  # If there are no waveforms, return an empty plot
209
  if len(waveforms) == 0:
 
210
  fig, ax = plt.subplots()
211
  ax.text(0.5,0.5,'No waveforms found')
212
  fig.canvas.draw();
213
  image = np.array(fig.canvas.renderer.buffer_rgba())
214
  plt.close(fig)
215
- return image
 
 
 
216
 
217
 
218
  first_distances = bin_distances(distances, bin_size=10/111.2)
@@ -239,9 +244,12 @@ def predict_on_section(client_name, timestamp, eq_lat, eq_lon, radius_km, source
239
  p_phases = output[:, 0]
240
  s_phases = output[:, 1]
241
 
242
- # Max confidence - min variance
243
- p_max_confidence = np.min([p_phases[i::len(waveforms)].std() for i in range(len(waveforms))])
244
- s_max_confidence = np.min([s_phases[i::len(waveforms)].std() for i in range(len(waveforms))])
 
 
 
245
 
246
  print(f"Starting plotting {len(waveforms)} waveforms")
247
  fig, ax = plt.subplots(ncols=3, figsize=(10, 3))
@@ -266,17 +274,18 @@ def predict_on_section(client_name, timestamp, eq_lat, eq_lon, radius_km, source
266
  topo_map.da.plot(ax = ax[2], cmap='Greys', add_colorbar=False, add_labels=False)
267
  ax[1].imshow(hillshade, cmap="Greys", alpha=0.5)
268
 
269
- output_picks = pd.DataFrame({'station_name' : [], 'starttime' : [],
 
 
270
  'p_phase, s' : [], 'p_uncertainty, s' : [],
271
  's_phase, s' : [], 's_uncertainty, s' : [],
272
  'velocity_p, km/s' : [], 'velocity_s, km/s' : []})
273
-
274
-
275
  for i in range(len(waveforms)):
276
  print(f"Plotting waveform {i+1}/{len(waveforms)}")
277
- current_P = p_phases[i::len(waveforms)]
278
- current_S = s_phases[i::len(waveforms)]
279
-
280
  x = [t0s[i] + pd.Timedelta(seconds=k/100) for k in np.linspace(0,6000,6000)]
281
  x = mdates.date2num(x)
282
 
@@ -284,32 +293,40 @@ def predict_on_section(client_name, timestamp, eq_lat, eq_lon, radius_km, source
284
  p_conf = 1/(current_P.std()/p_max_confidence).item()
285
  s_conf = 1/(current_S.std()/s_max_confidence).item()
286
 
287
- ax[0].plot(x, waveforms[i][0, 0]*10+distances[i]*111.2, color='black', alpha=0.5, lw=1)
288
 
289
- ax[0].scatter(x[int(current_P.mean()*waveforms[i][0].shape[-1])], waveforms[i][0, 0].mean()+distances[i]*111.2, color='r', alpha=p_conf, marker='|')
290
- ax[0].scatter(x[int(current_S.mean()*waveforms[i][0].shape[-1])], waveforms[i][0, 0].mean()+distances[i]*111.2, color='b', alpha=s_conf, marker='|')
291
- ax[0].set_ylabel('Z')
292
 
293
- delta_t = t0s[i].timestamp - obspy.UTCDateTime(timestamp).timestamp
 
 
 
 
 
294
 
295
- velocity_p = (distances[i]*111.2)/(delta_t+current_P.mean()*60).item()
296
- velocity_s = (distances[i]*111.2)/(delta_t+current_S.mean()*60).item()
 
 
 
 
 
297
 
 
 
 
 
 
298
  print(f"Station {st_lats[i]}, {st_lons[i]} has P velocity {velocity_p} and S velocity {velocity_s}")
299
 
300
- output_picks = output_picks.append(pd.DataFrame({'station_name': [names[i]], 'starttime' : [str(t0s[i])],
 
 
301
  'p_phase, s' : [(delta_t+current_P.mean()*60).item()], 'p_uncertainty, s' : [current_P.std().item()*60],
302
  's_phase, s' : [(delta_t+current_S.mean()*60).item()], 's_uncertainty, s' : [current_S.std().item()*60],
303
  'velocity_p, km/s' : [velocity_p], 'velocity_s, km/s' : [velocity_s]}))
304
 
305
- # Generate an array from st_lat to eq_lat and from st_lon to eq_lon
306
- x = np.linspace(st_lons[i], eq_lon, 50)
307
- y = np.linspace(st_lats[i], eq_lat, 50)
308
 
309
- # Plot the array
310
- ax[1].scatter(x, y, c=np.zeros_like(x)+velocity_p, alpha=0.1, vmin=0, vmax=8)
311
- ax[2].scatter(x, y, c=np.zeros_like(x)+velocity_s, alpha=0.1, vmin=0, vmax=8)
312
-
313
  # Add legend
314
  ax[0].scatter(None, None, color='r', marker='|', label='P')
315
  ax[0].scatter(None, None, color='b', marker='|', label='S')
@@ -341,18 +358,11 @@ def predict_on_section(client_name, timestamp, eq_lat, eq_lon, radius_km, source
341
  fig.canvas.draw();
342
  image = np.array(fig.canvas.renderer.buffer_rgba())
343
  plt.close(fig)
344
-
345
- output_picks.to_csv('data/picks.csv', index=False)
346
- output_csv = 'data/picks.csv'
347
 
348
  return image, output_picks, output_csv
349
 
350
- def download_picks(output_picks):
351
- output_csv = io.BytesIO()
352
- output_picks.to_csv(output_csv, index=False)
353
- output_csv.seek(0)
354
- return output_csv
355
-
356
  model = torch.jit.load("model.pt")
357
 
358
  with gr.Blocks() as demo:
@@ -410,7 +420,7 @@ with gr.Blocks() as demo:
410
  gr.HTML("""
411
  <div style="padding: 20px; border-radius: 10px; font-size: 16px;">
412
  <p style="font-weight: bold; font-size: 24px; margin-bottom: 20px;">Using PhaseHunter to Analyze Seismic Waveforms</p>
413
- <p>Select an earthquake from the global earthquake catalogue and the app will download the waveform from the FDSN client of your choice. The app will use a velocity model of your choice to select appropriate time windows for each station within a specified radius of the earthquake.</p>
414
  <p>The app will then analyze the waveforms and mark the detected phases on the waveform. Pick data for each waveform is reported in seconds from the start of the waveform.</p>
415
  <p>Velocities are derived from distance and travel time determined by PhaseHunter picks (<span style="font-style: italic;">v = distance/predicted_pick_time</span>). The background of the velocity plot is colored by DEM.</p>
416
  </div>
@@ -462,7 +472,8 @@ with gr.Blocks() as demo:
462
  with gr.Column(scale=2):
463
  radius_inputs = gr.Slider(minimum=1,
464
  maximum=200,
465
- value=50, label="Radius (km)",
 
466
  step=10,
467
  info="""Select the radius around the earthquake to download data from.\n
468
  Note that the larger the radius, the longer the app will take to run.""",
@@ -476,6 +487,23 @@ with gr.Blocks() as demo:
476
  info="Maximum number of waveforms to show per section\n (to avoid long prediction times)",
477
  interactive=True,
478
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
479
 
480
  button = gr.Button("Predict phases")
481
  output_image = gr.Image(label='Waveforms with Phases Marked', type='numpy', interactive=False)
@@ -490,7 +518,8 @@ with gr.Blocks() as demo:
490
  inputs=[client_inputs, timestamp_inputs,
491
  eq_lat_inputs, eq_lon_inputs,
492
  radius_inputs, source_depth_inputs,
493
- velocity_inputs, max_waveforms_inputs],
 
494
  outputs=[output_image, output_picks, output_csv])
495
 
496
  demo.launch()
 
114
  coeff = 1 - (var / (residuals.max() - residuals.min()))
115
  return coeff
116
 
117
+ def predict_on_section(client_name, timestamp, eq_lat, eq_lon, radius_km, source_depth_km, velocity_model, max_waveforms, conf_thres_P, conf_thres_S):
118
  distances, t0s, st_lats, st_lons, waveforms, names = [], [], [], [], [], []
119
 
120
  taup_model = TauPyModel(model=velocity_model)
 
137
  minlongitude=(eq_lon-window), maxlongitude=(eq_lon+window),
138
  level='station')
139
  print('Finished downloading inventory')
140
+
141
  except (IndexError, FDSNNoDataException, FDSNTimeoutException, FDSNInternalServerException):
142
  fig, ax = plt.subplots()
143
  ax.text(0.5,0.5,'Something is wrong with the data provider, try another')
 
150
  cached_waveforms = glob("data/cached/*.mseed")
151
 
152
  for network in inv:
 
153
  if network.code == 'SY':
154
  continue
155
  for station in network:
 
165
  starttime = obspy.UTCDateTime(timestamp) + arrivals[0].time - 15
166
  endtime = starttime + 60
167
  try:
168
+ filename=f'{network.code}_{station.code}_{starttime}'
169
+ if f"data/cached/{filename}.mseed" not in cached_waveforms:
170
+ print(f'Downloading waveform for {filename}')
171
  waveform = client.get_waveforms(network=network.code, station=station.code, location="*", channel="*",
172
  starttime=starttime, endtime=endtime)
173
  waveform.write(f"data/cached/{network.code}_{station.code}_{starttime}.mseed", format="MSEED")
 
208
 
209
  # If there are no waveforms, return an empty plot
210
  if len(waveforms) == 0:
211
+ print('No waveforms found')
212
  fig, ax = plt.subplots()
213
  ax.text(0.5,0.5,'No waveforms found')
214
  fig.canvas.draw();
215
  image = np.array(fig.canvas.renderer.buffer_rgba())
216
  plt.close(fig)
217
+ output_picks = pd.DataFrame()
218
+ output_picks.to_csv('data/picks.csv', index=False)
219
+ output_csv = 'data/picks.csv'
220
+ return image, output_picks, output_csv
221
 
222
 
223
  first_distances = bin_distances(distances, bin_size=10/111.2)
 
244
  p_phases = output[:, 0]
245
  s_phases = output[:, 1]
246
 
247
+ p_phases = p_phases.reshape(len(waveforms),-1)
248
+ s_phases = s_phases.reshape(len(waveforms),-1)
249
+
250
+ # Max confidence - min variance
251
+ p_max_confidence = p_phases.std(axis=-1).min()
252
+ s_max_confidence = s_phases.std(axis=-1).min()
253
 
254
  print(f"Starting plotting {len(waveforms)} waveforms")
255
  fig, ax = plt.subplots(ncols=3, figsize=(10, 3))
 
274
  topo_map.da.plot(ax = ax[2], cmap='Greys', add_colorbar=False, add_labels=False)
275
  ax[1].imshow(hillshade, cmap="Greys", alpha=0.5)
276
 
277
+ output_picks = pd.DataFrame({'station_name' : [],
278
+ 'st_lat' : [], 'st_lon' : [],
279
+ 'starttime' : [],
280
  'p_phase, s' : [], 'p_uncertainty, s' : [],
281
  's_phase, s' : [], 's_uncertainty, s' : [],
282
  'velocity_p, km/s' : [], 'velocity_s, km/s' : []})
283
+
 
284
  for i in range(len(waveforms)):
285
  print(f"Plotting waveform {i+1}/{len(waveforms)}")
286
+ current_P = p_phases[i]
287
+ current_S = s_phases[i]
288
+
289
  x = [t0s[i] + pd.Timedelta(seconds=k/100) for k in np.linspace(0,6000,6000)]
290
  x = mdates.date2num(x)
291
 
 
293
  p_conf = 1/(current_P.std()/p_max_confidence).item()
294
  s_conf = 1/(current_S.std()/s_max_confidence).item()
295
 
296
+ delta_t = t0s[i].timestamp - obspy.UTCDateTime(timestamp).timestamp
297
 
298
+ ax[0].plot(x, waveforms[i][0, 0]*10+distances[i]*111.2, color='black', alpha=0.5, lw=1)
 
 
299
 
300
+ if (current_P.std().item()*60 < conf_thres_P) or (current_S.std().item()*60 < conf_thres_S):
301
+ ax[0].scatter(x[int(current_P.mean()*waveforms[i][0].shape[-1])], waveforms[i][0, 0].mean()+distances[i]*111.2, color='r', alpha=p_conf, marker='|')
302
+ ax[0].scatter(x[int(current_S.mean()*waveforms[i][0].shape[-1])], waveforms[i][0, 0].mean()+distances[i]*111.2, color='b', alpha=s_conf, marker='|')
303
+
304
+ velocity_p = (distances[i]*111.2)/(delta_t+current_P.mean()*60).item()
305
+ velocity_s = (distances[i]*111.2)/(delta_t+current_S.mean()*60).item()
306
 
307
+ # Generate an array from st_lat to eq_lat and from st_lon to eq_lon
308
+ x = np.linspace(st_lons[i], eq_lon, 50)
309
+ y = np.linspace(st_lats[i], eq_lat, 50)
310
+
311
+ # Plot the array
312
+ ax[1].scatter(x, y, c=np.zeros_like(x)+velocity_p, alpha=0.1, vmin=0, vmax=8)
313
+ ax[2].scatter(x, y, c=np.zeros_like(x)+velocity_s, alpha=0.1, vmin=0, vmax=8)
314
 
315
+ else:
316
+ velocity_p = np.nan
317
+ velocity_s = np.nan
318
+
319
+ ax[0].set_ylabel('Z')
320
  print(f"Station {st_lats[i]}, {st_lons[i]} has P velocity {velocity_p} and S velocity {velocity_s}")
321
 
322
+ output_picks = output_picks.append(pd.DataFrame({'station_name': [names[i]],
323
+ 'st_lat' : [st_lats[i]], 'st_lon' : [st_lons[i]],
324
+ 'starttime' : [str(t0s[i])],
325
  'p_phase, s' : [(delta_t+current_P.mean()*60).item()], 'p_uncertainty, s' : [current_P.std().item()*60],
326
  's_phase, s' : [(delta_t+current_S.mean()*60).item()], 's_uncertainty, s' : [current_S.std().item()*60],
327
  'velocity_p, km/s' : [velocity_p], 'velocity_s, km/s' : [velocity_s]}))
328
 
 
 
 
329
 
 
 
 
 
330
  # Add legend
331
  ax[0].scatter(None, None, color='r', marker='|', label='P')
332
  ax[0].scatter(None, None, color='b', marker='|', label='S')
 
358
  fig.canvas.draw();
359
  image = np.array(fig.canvas.renderer.buffer_rgba())
360
  plt.close(fig)
361
+ output_picks.to_csv(f'data/velocity/{eq_lat}_{eq_lon}_{timestamp}_{len(waveforms)}.csv', index=False)
362
+ output_csv = f'data/velocity/{eq_lat}_{eq_lon}_{timestamp}_{len(waveforms)}.csv'
 
363
 
364
  return image, output_picks, output_csv
365
 
 
 
 
 
 
 
366
  model = torch.jit.load("model.pt")
367
 
368
  with gr.Blocks() as demo:
 
420
  gr.HTML("""
421
  <div style="padding: 20px; border-radius: 10px; font-size: 16px;">
422
  <p style="font-weight: bold; font-size: 24px; margin-bottom: 20px;">Using PhaseHunter to Analyze Seismic Waveforms</p>
423
+ <p>Select an earthquake from the global earthquake catalogue (e.g. <a href="https://earthquake.usgs.gov/earthquakes/map">USGS</a>) and the app will download the waveform from the FDSN client of your choice. The app will use a velocity model of your choice to select appropriate time windows for each station within a specified radius of the earthquake.</p>
424
  <p>The app will then analyze the waveforms and mark the detected phases on the waveform. Pick data for each waveform is reported in seconds from the start of the waveform.</p>
425
  <p>Velocities are derived from distance and travel time determined by PhaseHunter picks (<span style="font-style: italic;">v = distance/predicted_pick_time</span>). The background of the velocity plot is colored by DEM.</p>
426
  </div>
 
472
  with gr.Column(scale=2):
473
  radius_inputs = gr.Slider(minimum=1,
474
  maximum=200,
475
+ value=50,
476
+ label="Radius (km)",
477
  step=10,
478
  info="""Select the radius around the earthquake to download data from.\n
479
  Note that the larger the radius, the longer the app will take to run.""",
 
487
  info="Maximum number of waveforms to show per section\n (to avoid long prediction times)",
488
  interactive=True,
489
  )
490
+ with gr.Column(scale=2):
491
+ P_thres_inputs = gr.Slider(minimum=0.01,
492
+ maximum=1,
493
+ value=0.1,
494
+ label="P uncertainty threshold, s",
495
+ step=0.01,
496
+ info="Acceptable uncertainty for P picks expressed in std() seconds",
497
+ interactive=True,
498
+ )
499
+ S_thres_inputs = gr.Slider(minimum=0.01,
500
+ maximum=1,
501
+ value=0.2,
502
+ label="S uncertainty threshold, s",
503
+ step=0.01,
504
+ info="Acceptable uncertainty for S picks expressed in std() seconds",
505
+ interactive=True,
506
+ )
507
 
508
  button = gr.Button("Predict phases")
509
  output_image = gr.Image(label='Waveforms with Phases Marked', type='numpy', interactive=False)
 
518
  inputs=[client_inputs, timestamp_inputs,
519
  eq_lat_inputs, eq_lon_inputs,
520
  radius_inputs, source_depth_inputs,
521
+ velocity_inputs, max_waveforms_inputs,
522
+ P_thres_inputs, S_thres_inputs],
523
  outputs=[output_image, output_picks, output_csv])
524
 
525
  demo.launch()