Spaces:

AUEB-NLP
/

The-Greek-NLP-API

Sleeping

App Files Files Community

eloukas commited on Sep 1

Commit

7928a0f

•

1 Parent(s): d48b193

Add files

Browse files

Files changed (3) hide show

Dockerfile +16 -0
app.py +404 -0
requirements.txt +4 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,16 @@

+# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,404 @@

+from typing import Dict, List
+from fastapi import FastAPI, HTTPException, Query
+from fastapi.responses import RedirectResponse
+from gr_nlp_toolkit import Pipeline
+from pydantic import BaseModel, Field
+app = FastAPI(
+    title="The Grεεk NLP API 🇬🇷",
+    description="State-of-the-art API for Greek NLP tasks including Greeklish to Greek conversion (G2G), Named Entity Recognition (NER), Part-of-Speech (POS) tagging, and Dependency Parsing (DP). Powered by the Grεεk NLP Toolkit, available on PyPI (`pip install gr-nlp-toolkit`).",
+    version="1.0.0",
+    contact={
+        "name": "Natural Language Processing Group - Athens University of Economics and Business (AUEB)",
+        "url": "http://nlp.cs.aueb.gr/",
+        "api_author": "Lefteris Loukas",
+    },
+)
+# Instantiate the Pipeline
+nlp_pos_ner_dp_with_g2g = Pipeline("pos,ner,dp,g2g")
+# Pydantic models for responses
+class G2GOutput(BaseModel):
+    greek_text: str = Field(
+        ...,
+        example="η θεσσαλονικη ειναι ωραια πολη",
+        description="Converted Greek text",
+    )
+class NERItem(BaseModel):
+    token: str = Field(..., example="αργεντινη")
+    ner_value: str = Field(..., example="S-ORG")
+class POSItem(BaseModel):
+    token: str = Field(..., example="μου")
+    upos: str = Field(..., example="PRON")
+    morphological_features: Dict[str, str] = Field(
+        ...,
+        example={
+            "Case": "Gen",
+            "Gender": "Masc",
+            "Number": "Sing",
+            "Person": "1",
+            "Poss": "_",
+            "PronType": "Prs",
+        },
+    )
+class POSResponse(BaseModel):
+    pos_results: List[POSItem] = Field(
+        ...,
+        description="Part-of-Speech tagging information",
+        example=[
+            {
+                "token": "μου",
+                "upos": "PRON",
+                "morphological_features": {
+                    "Case": "Gen",
+                    "Gender": "Masc",
+                    "Number": "Sing",
+                    "Person": "1",
+                    "Poss": "_",
+                    "PronType": "Prs",
+                },
+            },
+            {
+                "token": "αρεσει",
+                "upos": "VERB",
+                "morphological_features": {
+                    "Aspect": "Imp",
+                    "Case": "_",
+                    "Gender": "_",
+                    "Mood": "Ind",
+                    "Number": "Sing",
+                    "Person": "3",
+                    "Tense": "Pres",
+                    "VerbForm": "Fin",
+                    "Voice": "Act",
+                },
+            },
+            {
+                "token": "να",
+                "upos": "AUX",
+                "morphological_features": {
+                    "Aspect": "_",
+                    "Mood": "_",
+                    "Number": "_",
+                    "Person": "_",
+                    "Tense": "_",
+                    "VerbForm": "_",
+                    "Voice": "_",
+                },
+            },
+            {
+                "token": "διαβαζω",
+                "upos": "VERB",
+                "morphological_features": {
+                    "Aspect": "Imp",
+                    "Case": "_",
+                    "Gender": "_",
+                    "Mood": "Ind",
+                    "Number": "Sing",
+                    "Person": "1",
+                    "Tense": "Pres",
+                    "VerbForm": "Fin",
+                    "Voice": "Act",
+                },
+            },
+            {
+                "token": "τα",
+                "upos": "DET",
+                "morphological_features": {
+                    "Case": "Acc",
+                    "Definite": "Def",
+                    "Gender": "Neut",
+                    "Number": "Plur",
+                    "PronType": "Art",
+                },
+            },
+            {
+                "token": "post",
+                "upos": "X",
+                "morphological_features": {"Foreign": "Yes"},
+            },
+            {
+                "token": "του",
+                "upos": "DET",
+                "morphological_features": {
+                    "Case": "Gen",
+                    "Definite": "Def",
+                    "Gender": "Masc",
+                    "Number": "Sing",
+                    "PronType": "Art",
+                },
+            },
+            {
+                "token": "andrew",
+                "upos": "X",
+                "morphological_features": {"Foreign": "Yes"},
+            },
+            {
+                "token": "ng",
+                "upos": "X",
+                "morphological_features": {"Foreign": "Yes"},
+            },
+            {"token": "στο", "upos": "_", "morphological_features": {}},
+            {
+                "token": "twitter",
+                "upos": "X",
+                "morphological_features": {"Foreign": "Yes"},
+            },
+            {"token": ".", "upos": "PUNCT", "morphological_features": {}},
+        ],
+    )
+class DPItem(BaseModel):
+    token: str = Field(..., example="προτιμω")
+    head: int = Field(..., example=0)
+    deprel: str = Field(..., example="root")
+class DPResponse(BaseModel):
+    dp_results: List[DPItem] = Field(
+        ...,
+        description="Dependency Parsing information",
+        example=[
+            {"token": "προτιμω", "head": 0, "deprel": "root"},
+            {"token": "την", "head": 4, "deprel": "det"},
+            {"token": "πρωινη", "head": 4, "deprel": "amod"},
+            {"token": "πτηση", "head": 1, "deprel": "obj"},
+            {"token": "απο", "head": 7, "deprel": "case"},
+            {"token": "την", "head": 7, "deprel": "det"},
+            {"token": "αθηνα", "head": 4, "deprel": "nmod"},
+            {"token": "στη", "head": 9, "deprel": "case"},
+            {"token": "θεσσαλονικη", "head": 4, "deprel": "nmod"},
+            {"token": ".", "head": 1, "deprel": "punct"},
+        ],
+    )
+# API endpoints
+@app.post("/g2g", response_model=G2GOutput, summary="Convert Greeklish to Greek")
+async def greeklish_to_greek(
+    text: str = Query(
+        ...,
+        description="The Greeklish text to convert",
+        example="H thessaloniki einai wraia polh",
+    ),
+):
+    """
+    Convert Greeklish (Greek written with Latin characters) to Greek.
+    This endpoint takes Greeklish text (Greek written with Latin characters) as input and returns the
+    transliterated Greek text.
+    """
+    try:
+        greek_text = " ".join(
+            [token.text for token in nlp_pos_ner_dp_with_g2g(text).tokens]
+        )
+        return G2GOutput(greek_text=greek_text)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+class NERResponse(BaseModel):
+    ner_results: List[NERItem] = Field(
+        ...,
+        description="Named Entity Recognition information",
+        example=[
+            {"token": "η", "ner_value": "O"},
+            {"token": "αργεντινη", "ner_value": "S-ORG"},
+            {"token": "κερδισε", "ner_value": "O"},
+            {"token": "το", "ner_value": "O"},
+            {"token": "παγκοσμιο", "ner_value": "B-EVENT"},
+            {"token": "κυπελλο", "ner_value": "E-EVENT"},
+            {"token": "το", "ner_value": "O"},
+            {"token": "2022", "ner_value": "S-DATE"},
+        ],
+    )
+# @app.post("/ner", response_model=List[NERItem], summary="Named Entity Recognition")
+@app.post("/ner", response_model=NERResponse, summary="Named Entity Recognition")
+async def process_ner(
+    text: str = Query(
+        ...,
+        description="The text to process for NER",
+        example="Η Αργεντινή κέρδισε το Παγκόσμιο Κύπελλο το 2022",
+    ),
+):
+    """
+    The NER endpoint takes Greek text as input and returns a list of dictionaries with the token and the NER value.
+    Named Entity Recognition (NER) Labels:
+    ```python
+        ner_possible_labels = [
+            'O', 'S-GPE', 'S-ORG', 'S-CARDINAL', 'B-ORG', 'E-ORG', 'B-DATE', 'E-DATE', 'S-NORP',
+            'B-GPE', 'E-GPE', 'S-EVENT', 'S-DATE', 'S-PRODUCT', 'S-LOC', 'I-ORG', 'S-PERSON',
+            'S-ORDINAL', 'B-PERSON', 'I-PERSON', 'E-PERSON', 'B-LAW', 'I-LAW', 'E-LAW', 'B-MONEY',
+            'I-MONEY', 'E-MONEY', 'B-EVENT', 'I-EVENT', 'E-EVENT', 'B-FAC', 'E-FAC', 'I-DATE',
+            'S-PERCENT', 'B-QUANTITY', 'E-QUANTITY', 'B-WORK_OF_ART', 'I-WORK_OF_ART', 'E-WORK_OF_ART',
+            'I-FAC', 'S-LAW', 'S-TIME', 'B-LOC', 'E-LOC', 'I-LOC', 'S-FAC', 'B-TIME', 'E-TIME',
+            'S-WORK_OF_ART', 'B-PRODUCT', 'E-PRODUCT', 'B-CARDINAL', 'E-CARDINAL', 'S-MONEY',
+            'S-LANGUAGE', 'I-TIME', 'I-PRODUCT', 'I-GPE', 'I-QUANTITY', 'B-NORP', 'E-NORP',
+            'S-QUANTITY', 'B-PERCENT', 'I-PERCENT', 'E-PERCENT', 'I-CARDINAL', 'B-ORDINAL',
+            'I-ORDINAL', 'E-ORDINAL'
+        ]
+    ```
+    """
+    try:
+        doc = nlp_pos_ner_dp_with_g2g(text)
+        # Create a list of dictionaries, each with "token" and "ner_value"
+        ner_list = [
+            {"token": token.text, "ner_value": token.ner} for token in doc.tokens
+        ]
+        return {"ner_results": ner_list}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# @app.post("/pos", response_model=List[POSItem], summary="Part-of-Speech Tagging")
+@app.post("/pos", response_model=POSResponse, summary="Part-of-Speech Tagging")
+async def process_pos(
+    text: str = Query(
+        ...,
+        description="The text to process for POS tagging",
+        example="Μου αρέσει να διαβάζω τα post του Andrew Ng στο Twitter.",
+    ),
+):
+    """
+    The POS Tagging endpoint analyzes the input text and provides Universal POS (UPOS) tags and detailed morphological features.
+    It returns a list of dictionaries with "token", "upos", and "morphological_features" keys.
+    The "morphological_features" key contains a dictionary itself with detailed morphological features.
+    The UPOS and morphological features are based on the Universal Dependencies (UD) framework: [https://universaldependencies.org/u/pos/](https://universaldependencies.org/u/pos/)
+    Complete list of the Universal POS (UPOS) tags and morphological features:
+    ```python
+        {'ADJ': ['Degree', 'Number', 'Gender', 'Case'],
+        'ADP': ['Number', 'Gender', 'Case'],
+        'ADV': ['Degree', 'Abbr'],
+        'AUX': ['Mood',
+                'Aspect',
+                'Tense',
+                'Number',
+                'Person',
+                'VerbForm',
+                'Voice'],
+        'CCONJ': [],
+        'DET': ['Number', 'Gender', 'PronType', 'Definite', 'Case'],
+        'NOUN': ['Number', 'Gender', 'Abbr', 'Case'],
+        'NUM': ['NumType', 'Number', 'Gender', 'Case'],
+        'PART': [],
+        'PRON': ['Number', 'Gender', 'Person', 'Poss', 'PronType', 'Case'],
+        'PROPN': ['Number', 'Gender', 'Case'],
+        'PUNCT': [],
+        'SCONJ': [],
+        'SYM': [],
+        'VERB': ['Mood',
+                'Aspect',
+                'Tense',
+                'Number',
+                'Gender',
+                'Person',
+                'VerbForm',
+                'Voice',
+                'Case'],
+        'X': ['Foreign'],
+    ```
+    ```python
+        {'Abbr': ['_', 'Yes'],
+        'Aspect': ['Perf', '_', 'Imp'],
+        'Case': ['Dat', '_', 'Acc', 'Gen', 'Nom', 'Voc'],
+        'Definite': ['Ind', 'Def', '_'],
+        'Degree': ['Cmp', 'Sup', '_'],
+        'Foreign': ['_', 'Yes'],
+        'Gender': ['Fem', 'Masc', '_', 'Neut'],
+        'Mood': ['Ind', '_', 'Imp'],
+        'NumType': ['Mult', 'Card', '_', 'Ord', 'Sets'],
+        'Number': ['Plur', '_', 'Sing'],
+        'Person': ['3', '1', '_', '2'],
+        'Poss': ['_', 'Yes'],
+        'PronType': ['Ind', 'Art', '_', 'Rel', 'Dem', 'Prs', 'Ind,Rel', 'Int'],
+        'Tense': ['Pres', 'Past', '_'],
+        'VerbForm': ['Part', 'Conv', '_', 'Inf', 'Fin'],
+        'Voice': ['Pass', 'Act', '_'],
+    ```
+    """
+    try:
+        doc = nlp_pos_ner_dp_with_g2g(text)
+        # Create a list of dictionaries, each with "token", "upos", and "morphological_features"
+        pos_list = [
+            {
+                "token": token.text,
+                "upos": token.upos,
+                "morphological_features": token.feats,
+            }
+            for token in doc.tokens
+        ]
+        # return pos_list
+        return {"pos_results": pos_list}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# @app.post("/dp", response_model=List[DPItem], summary="Dependency Parsing")
+@app.post("/dp", response_model=DPResponse, summary="Dependency Parsing")
+async def process_dp(
+    text: str = Query(
+        ...,
+        description="The text to process for Dependency Parsing",
+        example="Προτιμώ την πρωινή πτήση από την Αθήνα στη Θεσσαλονίκη",
+    ),
+):
+    """
+    The Dependency Parsing endpoint analyzes the syntactic structure of the input text.
+    It provides the tokens' (syntactic) heads and dependency relations. A head value of 0 indicates the root.
+    More specifically, the endpoint returns a list of dictionaries with "token", "head", and "deprel" keys.
+    Dependency Parsing Labels:
+    ```python
+        dp_possible_labels = ['obl', 'obj', 'dep', 'mark', 'case', 'flat', 'nummod', 'obl:arg', 'punct', 'cop',
+        'acl:relcl', 'expl', 'nsubj', 'csubj:pass', 'root', 'advmod', 'nsubj:pass', 'ccomp',
+        'conj', 'amod', 'xcomp', 'aux', 'appos', 'csubj', 'fixed', 'nmod', 'iobj', 'parataxis',
+        'orphan', 'det', 'advcl', 'vocative', 'compound', 'cc', 'discourse', 'acl', 'obl:agent']
+    ```
+    """
+    try:
+        doc = nlp_pos_ner_dp_with_g2g(text)
+        # Create a list of dictionaries, each with "token", "head", and "deprel"
+        dp_list = [
+            {"token": token.text, "head": token.head, "deprel": token.deprel}
+            for token in doc.tokens
+        ]
+        return {"dp_results": dp_list}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/", include_in_schema=False)
+async def root():
+    return RedirectResponse(url="/docs#")
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app)

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+fastapi==0.112.2
+gr-nlp-toolkit
+pydantic==2.8.2
+uvicorn==0.30.6