from typing import Dict, List from fastapi import FastAPI, HTTPException, Query from fastapi.responses import RedirectResponse from gr_nlp_toolkit import Pipeline from pydantic import BaseModel, Field app = FastAPI( title="The Grεεk NLP API 🇬🇷", description="State-of-the-art API for Greek NLP tasks including Greeklish to Greek conversion (G2G), Named Entity Recognition (NER), Part-of-Speech (POS) tagging, and Dependency Parsing (DP). The API is powered by the Grεεk NLP Toolkit ([https://github.com/nlpaueb/gr-nlp-toolkit/](https://github.com/nlpaueb/gr-nlp-toolkit/)), which is also available via PyPI (`pip install gr-nlp-toolkit`). ", version="1.0.0", contact={ "name": "Natural Language Processing Group - Athens University of Economics and Business (AUEB)", "url": "http://nlp.cs.aueb.gr/", "api_author": "Lefteris Loukas", }, ) # Instantiate the Pipeline nlp_pos_ner_dp_with_g2g = Pipeline("pos,ner,dp,g2g") # Pydantic models for responses class G2GOutput(BaseModel): greek_text: str = Field( ..., example="η θεσσαλονικη ειναι ωραια πολη", description="Converted Greek text", ) class NERItem(BaseModel): token: str = Field(..., example="αργεντινη") ner_value: str = Field(..., example="S-ORG") class POSItem(BaseModel): token: str = Field(..., example="μου") upos: str = Field(..., example="PRON") morphological_features: Dict[str, str] = Field( ..., example={ "Case": "Gen", "Gender": "Masc", "Number": "Sing", "Person": "1", "Poss": "_", "PronType": "Prs", }, ) class POSResponse(BaseModel): pos_results: List[POSItem] = Field( ..., description="Part-of-Speech tagging information", example=[ { "token": "μου", "upos": "PRON", "morphological_features": { "Case": "Gen", "Gender": "Masc", "Number": "Sing", "Person": "1", "Poss": "_", "PronType": "Prs", }, }, { "token": "αρεσει", "upos": "VERB", "morphological_features": { "Aspect": "Imp", "Case": "_", "Gender": "_", "Mood": "Ind", "Number": "Sing", "Person": "3", "Tense": "Pres", "VerbForm": "Fin", "Voice": "Act", }, }, { "token": "να", "upos": "AUX", "morphological_features": { "Aspect": "_", "Mood": "_", "Number": "_", "Person": "_", "Tense": "_", "VerbForm": "_", "Voice": "_", }, }, { "token": "διαβαζω", "upos": "VERB", "morphological_features": { "Aspect": "Imp", "Case": "_", "Gender": "_", "Mood": "Ind", "Number": "Sing", "Person": "1", "Tense": "Pres", "VerbForm": "Fin", "Voice": "Act", }, }, { "token": "τα", "upos": "DET", "morphological_features": { "Case": "Acc", "Definite": "Def", "Gender": "Neut", "Number": "Plur", "PronType": "Art", }, }, { "token": "post", "upos": "X", "morphological_features": {"Foreign": "Yes"}, }, { "token": "του", "upos": "DET", "morphological_features": { "Case": "Gen", "Definite": "Def", "Gender": "Masc", "Number": "Sing", "PronType": "Art", }, }, { "token": "andrew", "upos": "X", "morphological_features": {"Foreign": "Yes"}, }, { "token": "ng", "upos": "X", "morphological_features": {"Foreign": "Yes"}, }, {"token": "στο", "upos": "_", "morphological_features": {}}, { "token": "twitter", "upos": "X", "morphological_features": {"Foreign": "Yes"}, }, {"token": ".", "upos": "PUNCT", "morphological_features": {}}, ], ) class DPItem(BaseModel): token: str = Field(..., example="προτιμω") head: int = Field(..., example=0) deprel: str = Field(..., example="root") class DPResponse(BaseModel): dp_results: List[DPItem] = Field( ..., description="Dependency Parsing information", example=[ {"token": "προτιμω", "head": 0, "deprel": "root"}, {"token": "την", "head": 4, "deprel": "det"}, {"token": "πρωινη", "head": 4, "deprel": "amod"}, {"token": "πτηση", "head": 1, "deprel": "obj"}, {"token": "απο", "head": 7, "deprel": "case"}, {"token": "την", "head": 7, "deprel": "det"}, {"token": "αθηνα", "head": 4, "deprel": "nmod"}, {"token": "στη", "head": 9, "deprel": "case"}, {"token": "θεσσαλονικη", "head": 4, "deprel": "nmod"}, {"token": ".", "head": 1, "deprel": "punct"}, ], ) # API endpoints @app.post("/g2g", response_model=G2GOutput, summary="Convert Greeklish to Greek (G2G)") async def greeklish_to_greek( text: str = Query( ..., description="The Greeklish text to convert", example="H thessaloniki einai wraia polh", ), ): """ The G2G (Greeklish-to-Greek) endpoint takes Greeklish text (Greek written with Latin characters) as input and transliterates it to Greek text. """ try: greek_text = " ".join( [token.text for token in nlp_pos_ner_dp_with_g2g(text).tokens] ) return G2GOutput(greek_text=greek_text) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) class NERResponse(BaseModel): ner_results: List[NERItem] = Field( ..., description="Named Entity Recognition information", example=[ {"token": "η", "ner_value": "O"}, {"token": "αργεντινη", "ner_value": "S-ORG"}, {"token": "κερδισε", "ner_value": "O"}, {"token": "το", "ner_value": "O"}, {"token": "παγκοσμιο", "ner_value": "B-EVENT"}, {"token": "κυπελλο", "ner_value": "E-EVENT"}, {"token": "το", "ner_value": "O"}, {"token": "2022", "ner_value": "S-DATE"}, ], ) # @app.post("/ner", response_model=List[NERItem], summary="Named Entity Recognition") @app.post("/ner", response_model=NERResponse, summary="Named Entity Recognition (NER)") async def process_ner( text: str = Query( ..., description="The text to process for NER", example="Η Αργεντινή κέρδισε το Παγκόσμιο Κύπελλο το 2022", ), ): """ The NER endpoint takes Greek text as input and returns a list of dictionaries with the token and the NER value. Named Entity Recognition (NER) Labels: ```python ner_possible_labels = [ 'O', 'S-GPE', 'S-ORG', 'S-CARDINAL', 'B-ORG', 'E-ORG', 'B-DATE', 'E-DATE', 'S-NORP', 'B-GPE', 'E-GPE', 'S-EVENT', 'S-DATE', 'S-PRODUCT', 'S-LOC', 'I-ORG', 'S-PERSON', 'S-ORDINAL', 'B-PERSON', 'I-PERSON', 'E-PERSON', 'B-LAW', 'I-LAW', 'E-LAW', 'B-MONEY', 'I-MONEY', 'E-MONEY', 'B-EVENT', 'I-EVENT', 'E-EVENT', 'B-FAC', 'E-FAC', 'I-DATE', 'S-PERCENT', 'B-QUANTITY', 'E-QUANTITY', 'B-WORK_OF_ART', 'I-WORK_OF_ART', 'E-WORK_OF_ART', 'I-FAC', 'S-LAW', 'S-TIME', 'B-LOC', 'E-LOC', 'I-LOC', 'S-FAC', 'B-TIME', 'E-TIME', 'S-WORK_OF_ART', 'B-PRODUCT', 'E-PRODUCT', 'B-CARDINAL', 'E-CARDINAL', 'S-MONEY', 'S-LANGUAGE', 'I-TIME', 'I-PRODUCT', 'I-GPE', 'I-QUANTITY', 'B-NORP', 'E-NORP', 'S-QUANTITY', 'B-PERCENT', 'I-PERCENT', 'E-PERCENT', 'I-CARDINAL', 'B-ORDINAL', 'I-ORDINAL', 'E-ORDINAL' ] ``` """ try: doc = nlp_pos_ner_dp_with_g2g(text) # Create a list of dictionaries, each with "token" and "ner_value" ner_list = [ {"token": token.text, "ner_value": token.ner} for token in doc.tokens ] return {"ner_results": ner_list} except Exception as e: raise HTTPException(status_code=500, detail=str(e)) # @app.post("/pos", response_model=List[POSItem], summary="Part-of-Speech Tagging") @app.post("/pos", response_model=POSResponse, summary="Part-of-Speech Tagging (POS)") async def process_pos( text: str = Query( ..., description="The text to process for POS tagging", example="Μου αρέσει να διαβάζω τα post του Andrew Ng στο Twitter.", ), ): """ The POS Tagging endpoint analyzes the input text and provides Universal POS (UPOS) tags and detailed morphological features. It returns a list of dictionaries with "token", "upos", and "morphological_features" keys. The "morphological_features" key contains a dictionary itself with detailed morphological features. The UPOS and morphological features are based on the Universal Dependencies (UD) framework: [https://universaldependencies.org/u/pos/](https://universaldependencies.org/u/pos/) Complete list of the Universal POS (UPOS) tags and morphological features: ```python {'ADJ': ['Degree', 'Number', 'Gender', 'Case'], 'ADP': ['Number', 'Gender', 'Case'], 'ADV': ['Degree', 'Abbr'], 'AUX': ['Mood', 'Aspect', 'Tense', 'Number', 'Person', 'VerbForm', 'Voice'], 'CCONJ': [], 'DET': ['Number', 'Gender', 'PronType', 'Definite', 'Case'], 'NOUN': ['Number', 'Gender', 'Abbr', 'Case'], 'NUM': ['NumType', 'Number', 'Gender', 'Case'], 'PART': [], 'PRON': ['Number', 'Gender', 'Person', 'Poss', 'PronType', 'Case'], 'PROPN': ['Number', 'Gender', 'Case'], 'PUNCT': [], 'SCONJ': [], 'SYM': [], 'VERB': ['Mood', 'Aspect', 'Tense', 'Number', 'Gender', 'Person', 'VerbForm', 'Voice', 'Case'], 'X': ['Foreign'], ``` ```python {'Abbr': ['_', 'Yes'], 'Aspect': ['Perf', '_', 'Imp'], 'Case': ['Dat', '_', 'Acc', 'Gen', 'Nom', 'Voc'], 'Definite': ['Ind', 'Def', '_'], 'Degree': ['Cmp', 'Sup', '_'], 'Foreign': ['_', 'Yes'], 'Gender': ['Fem', 'Masc', '_', 'Neut'], 'Mood': ['Ind', '_', 'Imp'], 'NumType': ['Mult', 'Card', '_', 'Ord', 'Sets'], 'Number': ['Plur', '_', 'Sing'], 'Person': ['3', '1', '_', '2'], 'Poss': ['_', 'Yes'], 'PronType': ['Ind', 'Art', '_', 'Rel', 'Dem', 'Prs', 'Ind,Rel', 'Int'], 'Tense': ['Pres', 'Past', '_'], 'VerbForm': ['Part', 'Conv', '_', 'Inf', 'Fin'], 'Voice': ['Pass', 'Act', '_'], ``` """ try: doc = nlp_pos_ner_dp_with_g2g(text) # Create a list of dictionaries, each with "token", "upos", and "morphological_features" pos_list = [ { "token": token.text, "upos": token.upos, "morphological_features": token.feats, } for token in doc.tokens ] # return pos_list return {"pos_results": pos_list} except Exception as e: raise HTTPException(status_code=500, detail=str(e)) # @app.post("/dp", response_model=List[DPItem], summary="Dependency Parsing") @app.post("/dp", response_model=DPResponse, summary="Dependency Parsing (DP)") async def process_dp( text: str = Query( ..., description="The text to process for Dependency Parsing", example="Προτιμώ την πρωινή πτήση από την Αθήνα στη Θεσσαλονίκη", ), ): """ The Dependency Parsing endpoint analyzes the syntactic structure of the input text. It provides the tokens' (syntactic) heads and dependency relations. A head value of 0 indicates the root. More specifically, the endpoint returns a list of dictionaries with "token", "head", and "deprel" keys. Dependency Parsing Labels: ```python dp_possible_labels = ['obl', 'obj', 'dep', 'mark', 'case', 'flat', 'nummod', 'obl:arg', 'punct', 'cop', 'acl:relcl', 'expl', 'nsubj', 'csubj:pass', 'root', 'advmod', 'nsubj:pass', 'ccomp', 'conj', 'amod', 'xcomp', 'aux', 'appos', 'csubj', 'fixed', 'nmod', 'iobj', 'parataxis', 'orphan', 'det', 'advcl', 'vocative', 'compound', 'cc', 'discourse', 'acl', 'obl:agent'] ``` """ try: doc = nlp_pos_ner_dp_with_g2g(text) # Create a list of dictionaries, each with "token", "head", and "deprel" dp_list = [ {"token": token.text, "head": token.head, "deprel": token.deprel} for token in doc.tokens ] return {"dp_results": dp_list} except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.get("/", include_in_schema=False) async def root(): return RedirectResponse(url="/docs#") if __name__ == "__main__": import uvicorn uvicorn.run(app)