Spaces:

Aekanun
/

Thai-HandWriting-to-Text

Running on Zero

App Files Files Community

Aekanun commited on 12 days ago

Commit

f9d68b0

•

1 Parent(s): 79ec84c

fixing app.py

Browse files

Files changed (1) hide show

app.py +13 -14

app.py CHANGED Viewed

@@ -19,7 +19,7 @@ processor = None
 if torch.cuda.is_available():
     torch.cuda.empty_cache()
     gc.collect()
-    print("เคลียร์ CUDA cache เรียบร้อยแล้ว")
 # Login to Hugging Face Hub
 if 'HUGGING_FACE_HUB_TOKEN' in os.environ:
@@ -32,12 +32,11 @@ def load_model_and_processor():
     """โหลดโมเดลและ processor"""
     global model, processor
     print("กำลังโหลดโมเดลและ processor...")
     try:
         # Model paths
         base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
         hub_model_path = "Aekanun/thai-handwriting-llm"
         # BitsAndBytes config
         bnb_config = BitsAndBytesConfig(
             load_in_4bit=True,
@@ -45,10 +44,11 @@ def load_model_and_processor():
             bnb_4bit_quant_type="nf4",
             bnb_4bit_compute_dtype=torch.bfloat16
         )
         # Load processor from base model
-        processor = AutoProcessor.from_pretrained(base_model_path)
         # Load model from Hub
         print("กำลังโหลดโมเดลจาก Hub...")
         model = AutoModelForVision2Seq.from_pretrained(
@@ -56,10 +56,10 @@ def load_model_and_processor():
             device_map="auto",
             torch_dtype=torch.bfloat16,
             quantization_config=bnb_config,
-            trust_remote_code=True
         )
         print("โหลดโมเดลสำเร็จ!")
         return True
     except Exception as e:
         print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}")
@@ -80,7 +80,7 @@ def process_handwriting(image):
         # Create prompt
         prompt = """Transcribe the Thai handwritten text from the provided image.
 Only return the transcription in Thai language."""
         # Create model inputs
         messages = [
             {
@@ -91,12 +91,12 @@ Only return the transcription in Thai language."""
                 ],
             }
         ]
         # Process with model
         text = processor.apply_chat_template(messages, tokenize=False)
         inputs = processor(text=text, images=image, return_tensors="pt")
         inputs = {k: v.to(model.device) for k, v in inputs.items()}
         # Generate
         with torch.no_grad():
             outputs = model.generate(
@@ -105,11 +105,10 @@ Only return the transcription in Thai language."""
                 do_sample=False,
                 pad_token_id=processor.tokenizer.pad_token_id
             )
         # Decode output
         transcription = processor.decode(outputs[0], skip_special_tokens=True)
         return transcription.strip()
     except Exception as e:
         return f"เกิดข้อผิดพลาด: {str(e)}"
@@ -125,7 +124,7 @@ if load_model_and_processor():
         description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ",
         examples=[["example1.jpg"], ["example2.jpg"]]
     )
     if __name__ == "__main__":
         demo.launch()
 else:

 if torch.cuda.is_available():
     torch.cuda.empty_cache()
     gc.collect()
+print("เคลียร์ CUDA cache เรียบร้อยแล้ว")
 # Login to Hugging Face Hub
 if 'HUGGING_FACE_HUB_TOKEN' in os.environ:
     """โหลดโมเดลและ processor"""
     global model, processor
     print("กำลังโหลดโมเดลและ processor...")
     try:
         # Model paths
         base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
         hub_model_path = "Aekanun/thai-handwriting-llm"
         # BitsAndBytes config
         bnb_config = BitsAndBytesConfig(
             load_in_4bit=True,
             bnb_4bit_quant_type="nf4",
             bnb_4bit_compute_dtype=torch.bfloat16
         )
         # Load processor from base model
+        print("กำลังโหลด processor...")
+        processor = AutoProcessor.from_pretrained(base_model_path, use_auth_token=True)
         # Load model from Hub
         print("กำลังโหลดโมเดลจาก Hub...")
         model = AutoModelForVision2Seq.from_pretrained(
             device_map="auto",
             torch_dtype=torch.bfloat16,
             quantization_config=bnb_config,
+            trust_remote_code=True,
+            use_auth_token=True
         )
         print("โหลดโมเดลสำเร็จ!")
         return True
     except Exception as e:
         print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}")
         # Create prompt
         prompt = """Transcribe the Thai handwritten text from the provided image.
 Only return the transcription in Thai language."""
         # Create model inputs
         messages = [
             {
                 ],
             }
         ]
         # Process with model
         text = processor.apply_chat_template(messages, tokenize=False)
         inputs = processor(text=text, images=image, return_tensors="pt")
         inputs = {k: v.to(model.device) for k, v in inputs.items()}
         # Generate
         with torch.no_grad():
             outputs = model.generate(
                 do_sample=False,
                 pad_token_id=processor.tokenizer.pad_token_id
             )
         # Decode output
         transcription = processor.decode(outputs[0], skip_special_tokens=True)
         return transcription.strip()
     except Exception as e:
         return f"เกิดข้อผิดพลาด: {str(e)}"
         description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ",
         examples=[["example1.jpg"], ["example2.jpg"]]
     )
     if __name__ == "__main__":
         demo.launch()
 else: