mtc
/

LeoLM-leo-mistral-hessianai-7b-classification-with-mixtral-explanation-3-epochs-finetuned

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

mtc commited on May 16

Commit

cadb0dd

•

1 Parent(s): e34bc0e

Update README.md

Files changed (1) hide show

README.md +5 -4

README.md CHANGED Viewed

@@ -58,13 +58,13 @@ def predict_with_vllm(prompts: List[str], model_name: str, max_context_length: i
     return predictions
-def predict_with_hf_generation_pipeline(prompts: List[str], model_name: str, max_new_tokens: int = 256,
-                                        batch_size: int = 2):
     text_generation_pipeline = pipeline("text-generation", model=model_name,
                                         model_kwargs={"torch_dtype": torch.float16}, device_map="auto",
                                         batch_size=batch_size)
-    batch_output = text_generation_pipeline(prompts, truncation=True, max_new_tokens=max_new_tokens,
                                             return_full_text=False)
     predictions = [result[0]['generated_text'] for result in batch_output]
     return predictions
@@ -89,7 +89,8 @@ Satz: {sentence}
 ### Erklärung und Label:"""
     prompts = generate_prompts_for_generation(prompt_template=prompt_template, article=article, summary_sentences=summary_sentences)
-    predictions = predict_with_hf_generation_pipeline(prompts=prompts, model_name=model_name, max_context_length=max_context_length)
     print(predictions)
     # Uncomment the following lines to use vllm for prediction

     return predictions
+def predict_with_hf_generation_pipeline(prompts: List[str], model_name: str, max_context_length: int = 4096,
+                                        batch_size: int = 2) -> List[str]:
     text_generation_pipeline = pipeline("text-generation", model=model_name,
                                         model_kwargs={"torch_dtype": torch.float16}, device_map="auto",
                                         batch_size=batch_size)
+    batch_output = text_generation_pipeline(prompts, truncation=True, max_length=max_context_length,
                                             return_full_text=False)
     predictions = [result[0]['generated_text'] for result in batch_output]
     return predictions
 ### Erklärung und Label:"""
     prompts = generate_prompts_for_generation(prompt_template=prompt_template, article=article, summary_sentences=summary_sentences)
+    predictions = predict_with_hf_generation_pipeline(prompts=prompts, model_name=model_name,
+                                                      max_context_length=max_context_length, batch_size=batch_size)
     print(predictions)
     # Uncomment the following lines to use vllm for prediction