EC2 Default User
commited on
Commit
•
9f6f345
1
Parent(s):
5960f45
initial upload
Browse files- README.md +79 -0
- config.json +74 -0
- generation_config.json +13 -0
- merges.txt +0 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +15 -0
- tokenizer.json +0 -0
- tokenizer_config.json +16 -0
- training_args.bin +3 -0
- vocab.json +0 -0
README.md
CHANGED
@@ -1,3 +1,82 @@
|
|
|
|
1 |
---
|
|
|
|
|
|
|
|
|
|
|
2 |
license: apache-2.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
---
|
3 |
+
language: en
|
4 |
+
tags:
|
5 |
+
- sagemaker
|
6 |
+
- bart
|
7 |
+
- summarization
|
8 |
license: apache-2.0
|
9 |
+
datasets:
|
10 |
+
- samsum
|
11 |
+
model-index:
|
12 |
+
- name: bart-large-tomasg25/scientific_lay_summarisation
|
13 |
+
results:
|
14 |
+
- task:
|
15 |
+
name: Abstractive Text Summarization
|
16 |
+
type: abstractive-text-summarization
|
17 |
+
dataset:
|
18 |
+
name: "tomasg25/scientific_lay_summarisation"
|
19 |
+
type: plos
|
20 |
+
metrics:
|
21 |
+
- name: Validation ROGUE-1
|
22 |
+
type: rogue-1
|
23 |
+
value: 42.621
|
24 |
+
- name: Validation ROGUE-2
|
25 |
+
type: rogue-2
|
26 |
+
value: 21.9825
|
27 |
+
- name: Validation ROGUE-L
|
28 |
+
type: rogue-l
|
29 |
+
value: 33.034
|
30 |
+
- name: Test ROGUE-1
|
31 |
+
type: rogue-1
|
32 |
+
value: 41.3174
|
33 |
+
- name: Test ROGUE-2
|
34 |
+
type: rogue-2
|
35 |
+
value: 20.8716
|
36 |
+
- name: Test ROGUE-L
|
37 |
+
type: rogue-l
|
38 |
+
value: 32.1337
|
39 |
+
widget:
|
40 |
---
|
41 |
+
## `bart-large-tomasg25/scientific_lay_summarisation`
|
42 |
+
This model was trained using Amazon SageMaker and the new Hugging Face Deep Learning container.
|
43 |
+
For more information look at:
|
44 |
+
- [🤗 Transformers Documentation: Amazon SageMaker](https://huggingface.co/transformers/sagemaker.html)
|
45 |
+
- [Example Notebooks](https://github.com/huggingface/notebooks/tree/master/sagemaker)
|
46 |
+
- [Amazon SageMaker documentation for Hugging Face](https://docs.aws.amazon.com/sagemaker/latest/dg/hugging-face.html)
|
47 |
+
- [Python SDK SageMaker documentation for Hugging Face](https://sagemaker.readthedocs.io/en/stable/frameworks/huggingface/index.html)
|
48 |
+
- [Deep Learning Container](https://github.com/aws/deep-learning-containers/blob/master/available_images.md#huggingface-training-containers)
|
49 |
+
## Hyperparameters
|
50 |
+
{
|
51 |
+
"cache_dir": "opt/ml/input",
|
52 |
+
"dataset_config_name": "plos",
|
53 |
+
"dataset_name": "tomasg25/scientific_lay_summarisation",
|
54 |
+
"do_eval": true,
|
55 |
+
"do_predict": true,
|
56 |
+
"do_train": true,
|
57 |
+
"fp16": true,
|
58 |
+
"learning_rate": 5e-05,
|
59 |
+
"model_name_or_path": "facebook/bart-large",
|
60 |
+
"num_train_epochs": 1,
|
61 |
+
"output_dir": "/opt/ml/model",
|
62 |
+
"per_device_eval_batch_size": 4,
|
63 |
+
"per_device_train_batch_size": 4,
|
64 |
+
"predict_with_generate": true,
|
65 |
+
"seed": 7
|
66 |
+
}
|
67 |
+
## Usage
|
68 |
+
from transformers import pipeline
|
69 |
+
summarizer = pipeline("summarization", model="sambydlo/bart-large-tomasg25/scientific_lay_summarisation")
|
70 |
+
article = "Food production is a major driver of greenhouse gas (GHG) emissions, water and land use, and dietary risk factors are contributors to non-communicable diseases. Shifts in dietary patterns can therefore potentially provide benefits for both the environment and health. However, there is uncertainty about the magnitude of these impacts, and the dietary changes necessary to achieve them. We systematically review the evidence on changes in GHG emissions, land use, and water use, from shifting current dietary intakes to environ- mentally sustainable dietary patterns. We find 14 common sustainable dietary patterns across reviewed studies, with reductions as high as 70–80% of GHG emissions and land use, and 50% of water use (with medians of about 20–30% for these indicators across all studies) possible by adopting sustainable dietary patterns. Reductions in environmental footprints were generally proportional to the magnitude of animal-based food restriction. Dietary shifts also yielded modest benefits in all-cause mortality risk. Our review reveals that environmental and health benefits are possible by shifting current Western diets to a variety of more sustainable dietary patterns."
|
71 |
+
summarizer(article)
|
72 |
+
## Results
|
73 |
+
| key | value |
|
74 |
+
| --- | ----- |
|
75 |
+
| eval_rouge1 | 41.3889 |
|
76 |
+
| eval_rouge2 | 13.3641 |
|
77 |
+
| eval_rougeL | 24.3154 |
|
78 |
+
| eval_rougeLsum | 36.612 |
|
79 |
+
| test_rouge1 | 41.4786 |
|
80 |
+
| test_rouge2 | 13.3787 |
|
81 |
+
| test_rougeL | 24.1558 |
|
82 |
+
| test_rougeLsum | 36.7723 |
|
config.json
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "facebook/bart-large",
|
3 |
+
"activation_dropout": 0.1,
|
4 |
+
"activation_function": "gelu",
|
5 |
+
"add_bias_logits": false,
|
6 |
+
"add_final_layer_norm": false,
|
7 |
+
"architectures": [
|
8 |
+
"BartForConditionalGeneration"
|
9 |
+
],
|
10 |
+
"attention_dropout": 0.1,
|
11 |
+
"bos_token_id": 0,
|
12 |
+
"classif_dropout": 0.1,
|
13 |
+
"classifier_dropout": 0.0,
|
14 |
+
"d_model": 1024,
|
15 |
+
"decoder_attention_heads": 16,
|
16 |
+
"decoder_ffn_dim": 4096,
|
17 |
+
"decoder_layerdrop": 0.0,
|
18 |
+
"decoder_layers": 12,
|
19 |
+
"decoder_start_token_id": 2,
|
20 |
+
"dropout": 0.1,
|
21 |
+
"early_stopping": true,
|
22 |
+
"encoder_attention_heads": 16,
|
23 |
+
"encoder_ffn_dim": 4096,
|
24 |
+
"encoder_layerdrop": 0.0,
|
25 |
+
"encoder_layers": 12,
|
26 |
+
"eos_token_id": 2,
|
27 |
+
"forced_bos_token_id": 0,
|
28 |
+
"forced_eos_token_id": 2,
|
29 |
+
"gradient_checkpointing": false,
|
30 |
+
"id2label": {
|
31 |
+
"0": "LABEL_0",
|
32 |
+
"1": "LABEL_1",
|
33 |
+
"2": "LABEL_2"
|
34 |
+
},
|
35 |
+
"init_std": 0.02,
|
36 |
+
"is_encoder_decoder": true,
|
37 |
+
"label2id": {
|
38 |
+
"LABEL_0": 0,
|
39 |
+
"LABEL_1": 1,
|
40 |
+
"LABEL_2": 2
|
41 |
+
},
|
42 |
+
"max_position_embeddings": 1024,
|
43 |
+
"model_type": "bart",
|
44 |
+
"no_repeat_ngram_size": 3,
|
45 |
+
"normalize_before": false,
|
46 |
+
"num_beams": 4,
|
47 |
+
"num_hidden_layers": 12,
|
48 |
+
"pad_token_id": 1,
|
49 |
+
"scale_embedding": false,
|
50 |
+
"task_specific_params": {
|
51 |
+
"summarization": {
|
52 |
+
"length_penalty": 1.0,
|
53 |
+
"max_length": 128,
|
54 |
+
"min_length": 12,
|
55 |
+
"num_beams": 4
|
56 |
+
},
|
57 |
+
"summarization_cnn": {
|
58 |
+
"length_penalty": 2.0,
|
59 |
+
"max_length": 142,
|
60 |
+
"min_length": 56,
|
61 |
+
"num_beams": 4
|
62 |
+
},
|
63 |
+
"summarization_xsum": {
|
64 |
+
"length_penalty": 1.0,
|
65 |
+
"max_length": 62,
|
66 |
+
"min_length": 11,
|
67 |
+
"num_beams": 6
|
68 |
+
}
|
69 |
+
},
|
70 |
+
"torch_dtype": "float32",
|
71 |
+
"transformers_version": "4.26.0",
|
72 |
+
"use_cache": true,
|
73 |
+
"vocab_size": 50265
|
74 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 0,
|
4 |
+
"decoder_start_token_id": 2,
|
5 |
+
"early_stopping": true,
|
6 |
+
"eos_token_id": 2,
|
7 |
+
"forced_bos_token_id": 0,
|
8 |
+
"forced_eos_token_id": 2,
|
9 |
+
"no_repeat_ngram_size": 3,
|
10 |
+
"num_beams": 4,
|
11 |
+
"pad_token_id": 1,
|
12 |
+
"transformers_version": "4.26.0"
|
13 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1564333254dd2bfe0d7d3e61f4ddaee6007cb9df0fce3c9aa5f18fe493979671
|
3 |
+
size 1625541389
|
special_tokens_map.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<s>",
|
3 |
+
"cls_token": "<s>",
|
4 |
+
"eos_token": "</s>",
|
5 |
+
"mask_token": {
|
6 |
+
"content": "<mask>",
|
7 |
+
"lstrip": true,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false
|
11 |
+
},
|
12 |
+
"pad_token": "<pad>",
|
13 |
+
"sep_token": "</s>",
|
14 |
+
"unk_token": "<unk>"
|
15 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"bos_token": "<s>",
|
4 |
+
"cls_token": "<s>",
|
5 |
+
"eos_token": "</s>",
|
6 |
+
"errors": "replace",
|
7 |
+
"mask_token": "<mask>",
|
8 |
+
"model_max_length": 1024,
|
9 |
+
"name_or_path": "facebook/bart-large",
|
10 |
+
"pad_token": "<pad>",
|
11 |
+
"sep_token": "</s>",
|
12 |
+
"special_tokens_map_file": null,
|
13 |
+
"tokenizer_class": "BartTokenizer",
|
14 |
+
"trim_offsets": true,
|
15 |
+
"unk_token": "<unk>"
|
16 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c99d1c899e011ca39c874bae2113d681be3cf63a441ae42e5bfae729953fe281
|
3 |
+
size 3707
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|