Zhu Lin commited on
Commit
d0a4a1f
1 Parent(s): 92a22b6
Files changed (6) hide show
  1. .DS_Store +0 -0
  2. README.md +6 -5
  3. app.py +13 -0
  4. config.json +33 -0
  5. configuration_bert.py +26 -0
  6. requirements.txt +11 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
README.md CHANGED
@@ -1,12 +1,13 @@
1
  ---
2
- title: Dnabert2 Demo
3
- emoji: 🦀
4
- colorFrom: blue
5
- colorTo: gray
6
  sdk: gradio
7
  sdk_version: 4.38.1
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: DNABERT-2 demo
3
+ emoji: 🐨
4
+ colorFrom: yellow
5
+ colorTo: green
6
  sdk: gradio
7
  sdk_version: 4.38.1
8
  app_file: app.py
9
  pinned: false
10
+ license: bigscience-openrail-m
11
  ---
12
 
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from transformers import AutoTokenizer, AutoModel, pipeline
3
+ from transformers.models.bert.configuration_bert import BertConfig
4
+ import gradio as gr
5
+
6
+ config = BertConfig.from_pretrained("czl/dnabert2")
7
+ tokenizer = AutoTokenizer.from_pretrained("czl/dnabert2", trust_remote_code=True)
8
+ model = AutoModel.from_pretrained("czl/dnabert2", trust_remote_code=True, config=config)
9
+
10
+ pipe = pipeline("feature-extraction", model=model, tokenizer=tokenizer)
11
+
12
+ demo = gr.Interface.from_pipeline(pipe)
13
+ demo.launch()
config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "czl/dnabert2",
3
+ "alibi_starting_size": 512,
4
+ "architectures": [
5
+ "BertForMaskedLM"
6
+ ],
7
+ "attention_probs_dropout_prob": 0,
8
+ "auto_map": {
9
+ "AutoConfig": "configuration_bert.BertConfig",
10
+ "AutoModel": "bert_layers.BertModel",
11
+ "AutoModelForMaskedLM": "bert_layers.BertForMaskedLM",
12
+ "AutoModelForSequenceClassification": "bert_layers.BertForSequenceClassification"
13
+ },
14
+ "classifier_dropout": null,
15
+ "gradient_checkpointing": false,
16
+ "hidden_act": "gelu",
17
+ "hidden_dropout_prob": 0.1,
18
+ "hidden_size": 768,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 3072,
21
+ "layer_norm_eps": 1e-12,
22
+ "max_position_embeddings": 512,
23
+ "model_type": "bert",
24
+ "num_attention_heads": 12,
25
+ "num_hidden_layers": 12,
26
+ "position_embedding_type": "absolute",
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.28.0",
29
+ "type_vocab_size": 2,
30
+ "use_cache": true,
31
+ "vocab_size": 4096,
32
+ "flash_attn": false
33
+ }
configuration_bert.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2022 MosaicML Examples authors
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ from transformers import BertConfig as TransformersBertConfig
5
+
6
+
7
+ class BertConfig(TransformersBertConfig):
8
+
9
+ def __init__(
10
+ self,
11
+ alibi_starting_size: int = 512,
12
+ attention_probs_dropout_prob: float = 0.0,
13
+ **kwargs,
14
+ ):
15
+ """Configuration class for MosaicBert.
16
+
17
+ Args:
18
+ alibi_starting_size (int): Use `alibi_starting_size` to determine how large of an alibi tensor to
19
+ create when initializing the model. You should be able to ignore this parameter in most cases.
20
+ Defaults to 512.
21
+ attention_probs_dropout_prob (float): By default, turn off attention dropout in Mosaic BERT
22
+ (otherwise, Flash Attention will be off by default). Defaults to 0.0.
23
+ """
24
+ super().__init__(
25
+ attention_probs_dropout_prob=attention_probs_dropout_prob, **kwargs)
26
+ self.alibi_starting_size = alibi_starting_size
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ transformers[torch]
2
+ torch
3
+ torchvision
4
+ torchaudio
5
+ einops
6
+ peft
7
+ omegaconf
8
+ evaluate
9
+ accelerate
10
+ gradio
11
+ spaces