import pandas as pd import streamlit as st from keybert import KeyBERT import yake from keyphrase_vectorizers import KeyphraseCountVectorizer from sklearn.feature_extraction.text import CountVectorizer st.title("Patent Text Extractor") placeholder = st.empty() text = placeholder.text_area("Paste or write text", height=300) button = st.button("Extract Keywords") patseer_stopwords=['a' , 'abaft' , 'abafter' , 'abaftest' , 'ability' , 'able' , 'about' , 'abouter' , 'aboutest' , 'above' , 'abovementioned' , 'abover' , 'abovest' , 'abroad' , 'absorbs' , 'abst' , 'abstr' , 'abstract' , 'Academic' , 'acceptable' , 'accepted' , 'accessed' , 'accommodate' , 'accommodated' , 'accommodates' , 'accommodating' , 'accompanied' , 'accompanies' , 'accompany' , 'accompanying' , 'accomplish' , 'accomplished' , 'accomplishes' , 'accomplishing' , 'accomplishment' , 'accordance' , 'according' , 'accordingly' , 'achieve' , 'achieved' , 'acquire' , 'acquisition' , 'across' , 'act' , 'action' , 'activated' , 'active' , 'activities' , 'activity' , 'actually' , 'adapted' , 'add' , 'added' , 'addition' , 'additional' , 'additionally' , 'addon' , 'adhere' , 'adhering' , 'adj' , 'adjective' , 'adjust' , 'adjustable' , 'adjusted' , 'advance' , 'advanced' , 'advancements' , 'advantage' , 'advantageous' , 'advantageously' , 'advantages' , 'adverb' , 'aer' , 'aest' , 'affected' , 'affecting' , 'affects' , 'afore' , 'aforementioned' , 'aforesaid' , 'after' , 'afterer' , 'afterest' , 'afterward' , 'afterwards' , 'again' , 'against' , 'agent' , 'ago' , 'agree' , 'ah' , 'ahead' , 'aid' , 'ain' , 'aint' , 'al' , 'albeit' , 'all' , 'aller' , 'allest' , 'allow' , 'allowed' , 'allowing' , 'allows' , 'alls' , 'allyou' , 'almost' , 'alone' , 'along' , 'alongside' , 'already' , 'also' , 'altered' , 'alternative' , 'alternatives' , 'although' , 'always' , 'am' , 'amid' , 'amidst' , 'among' , 'amongst' , 'amoungst' , 'amount' , 'ampfax' , 'amptrademark' , 'amptradmark' , 'an' , 'and' , 'andor' , 'anear' , 'anent' , 'annual' , 'another' , 'answer' , 'any' , 'anybody' , 'anyhow' , 'anymore' , 'anyone' , 'anything' , 'anyway' , 'anyways' , 'anywhere' , 'apart' , 'aparter' , 'apartest' , 'apparatus' , 'apparently' , 'appear' , 'appeared' , 'appearing' , 'appears' , 'appl' , 'applicable' , 'application' , 'applications' , 'applied' , 'apply' , 'applying' , 'appreciate' , 'appropriate' , 'appropriated' , 'appropriater' , 'appropriates' , 'appropriatest' , 'appropriating' , 'approximately' , 'apr' , 'april' , 'arch' , 'archive' , 'archives' , 'are' , 'area' , 'areas' , 'aren' , 'arent' , 'ares' , 'arise' , 'around' , 'arrange' , 'arranged' , 'arrangement' , 'arriv' , 'articles' , 'as' , 'ases' , 'aside' , 'asides' , 'ask' , 'asked' , 'asking' , 'asks' , 'aslant' , 'Assigned' , 'assisting' , 'associated' , 'astraddle' , 'astraddler' , 'astraddlest' , 'astride' , 'astrider' , 'astridest' , 'at' , 'athwart' , 'atleast' , 'atop' , 'attached' , 'attributed' , 'atween' , 'aught' , 'aughts' , 'auth' , 'available' , 'availabler' , 'availablest' , 'away' , 'awfully' , 'based' , 'be' , 'became' , 'because' , 'become' , 'becomes' , 'becoming' , 'becominger' , 'becomingest' , 'becomings' , 'been' , 'before' , 'beforehand' , 'beforehander' , 'beforehandest' , 'began' , 'begin' , 'beginnings' , 'begins' , 'behind' , 'behinds' , 'being' , 'beings' , 'believe' , 'below' , 'beneath' , 'beside' , 'besides' , 'best' , 'better' , 'bettered' , 'bettering' , 'betters' , 'between' , 'betwixt' , 'beyond' , 'bist' , 'block' , 'both' , 'bottom' , 'br' , 'brief' , 'briefly' , 'bringing' , 'but' , 'buts' , 'by' , 'byandby' , 'ca' , 'call' , 'called' , 'came' , 'can' , 'cannot' , 'cant' , 'cants' , 'case' , 'cases' , 'cause' , 'causes' , 'cer' , 'certain' , 'certainer' , 'certainest' , 'certainly' , 'cest' , 'changes' , 'characterised' , 'characterized' , 'characterizing' , 'chemical' , 'chez' , 'circa' , 'claim' , 'Claimed' , 'claiming' , 'Claims' , 'clear' , 'clearly' , 'cmon' , 'co' , 'com' , 'combined' , 'come' , 'comeon' , 'comeons' , 'comes' , 'Compared' , 'composition' , 'compound' , 'comprises' , 'comprising' , 'con' , 'concerns' , 'concerning' , 'concerninger' , 'concerningest' , 'configured' , 'consequently' , 'consider' , 'considering' , 'consisting' , 'consists' , 'constitute' , 'constituting' , 'contain' , 'containing' , 'contains' , 'corp' , 'corresponding' , 'corresponds' , 'could' , 'couldnt' , 'couldst' , 'course' , 'cum' , 'currently' , 'dare' , 'date' , 'dday' , 'ddays' , 'de' , 'dear' , 'define' , 'defined' , 'defining' , 'definitely' , 'describe' , 'described' , 'describes' , 'describing' , 'description' , 'designated' , 'desired' , 'despite' , 'despited' , 'despites' , 'despiting' , 'detail' , 'determined' , 'determining' , 'diagram' , 'did' , 'didnt' , 'differ' , 'Difference' , 'different' , 'differenter' , 'differentest' , 'differently' , 'directly' , 'disadvantage' , 'disclosed' , 'disclosure' , 'do' , 'doe' , 'does' , 'doesnt' , 'doing' , 'doings' , 'done' , 'doner' , 'dones' , 'donest' , 'dont' , 'dos' , 'dost' , 'doth' , 'down' , 'downed' , 'downing' , 'downs' , 'downward' , 'downwarder' , 'downwardest' , 'downwards' , 'drawback' , 'due' , 'during' , 'each' , 'earlier' , 'early' , 'ed' , 'edu' , 'effect' , 'eg' , 'eight' , 'eighth' , 'eighty' , 'either' , 'eleven' , 'else' , 'elsewhere' , 'embodiment' , 'embodiments' , 'empty' , 'end' , 'ended' , 'ending' , 'ends' , 'enough' , 'entirely' , 'EP' , 'ere' , 'especially' , 'et' , 'etc' , 'even' , 'evened' , 'evenest' , 'evenly' , 'evens' , 'evenser' , 'evensest' , 'ever' , 'evermore' , 'every' , 'everybody' , 'everyone' , 'everything' , 'everywhere' , 'ex' , 'exactly' , 'example' , 'examples' , 'except' , 'excepted' , 'excepting' , 'excepts' , 'executed' , 'exes' , 'Experiment' , 'Experiments' , 'faces' , 'facilitating' , 'fact' , 'facts' , 'failing' , 'failings' , 'fairly' , 'far' , 'farther' , 'felt' , 'few' , 'fewer' , 'fewest' , 'ff' , 'fifteen' , 'fifth' , 'fify' , 'fig' , 'figupon' , 'figuponed' , 'figuponing' , 'figupons' , 'fill' , 'find' , 'finds' , 'fire' , 'first' , 'five' , 'fix' , 'followed' , 'following' , 'follows' , 'followthrough' , 'for' , 'forby' , 'forbye' , 'fore' , 'foregoing' , 'forer' , 'fores' , 'forever' , 'form' , 'formed' , 'former' , 'formerer' , 'formerest' , 'formerly' , 'formers' , 'formula' , 'formulae' , 'fornenst' , 'forth' , 'forty' , 'forward' , 'forwhy' , 'found' , 'four' , 'fourth' , 'frae' , 'from' , 'front' , 'fs' , 'full' , 'fully' , 'further' , 'furthered' , 'furtherer' , 'furtherest' , 'furthering' , 'furthermore' , 'furthers' , 'gave' , 'general' , 'generally' , 'get' , 'gets' , 'getting' , 'give' , 'given' , 'gives' , 'giving' , 'go' , 'goes' , 'going' , 'gone' , 'good' , 'goods' , 'got' , 'gotta' , 'gotten' , 'great' , 'greater' , 'gt' , 'had' , 'hadnt' , 'hadst' , 'hae' , 'half' , 'happens' , 'hardly' , 'has' , 'hasnt' , 'hast' , 'hath' , 'have' , 'havent' , 'haves' , 'having' , 'he' , 'hed' , 'held' , 'hello' , 'help' , 'hence' , 'her' , 'here' , 'hereafter' , 'hereafters' , 'hereby' , 'herein' , 'heres' , 'hereupon' , 'herewith' , 'hers' , 'herse' , 'herself' , 'hes' , 'hi' , 'hid' , 'high' , 'higher' , 'highest' , 'him' , 'himse' , 'himself' , 'his' , 'hither' , 'hitherer' , 'hitherest' , 'hoo' , 'hoos' , 'hopefully' , 'how' , 'howbeit' , 'howdoyoudo' , 'however' , 'http' , 'huh' , 'humph' , 'hundred' , 'i' , 'id' , 'idem' , 'idemer' , 'idemest' , 'identifying' , 'ie' , 'if' , 'ifs' , 'ignored' , 'illustrates' , 'im' , 'immediate' , 'immediately' , 'immediater' , 'immediatest' , 'importance' , 'important' , 'in' , 'inasmuch' , 'inc' , 'include' , 'included' , 'includes' , 'including' , 'indeed' , 'independently' , 'indicate' , 'indicated' , 'indicates' , 'indicating' , 'info' , 'inner' , 'inside' , 'insofar' , 'instead' , 'interest' , 'interested' , 'interesting' , 'interests' , 'into' , 'Introduced' , 'Invention' , 'Investigated' , 'involves' , 'involving' , 'inwarder' , 'inwardest' , 'inwards' , 'is' , 'isnt' , 'it' , 'itd' , 'its' , 'itse' , 'itself' , 'just' , 'keep' , 'keeps' , 'kept' , 'keys' , 'kg' , 'kind' , 'km' , 'knew' , 'know' , 'known' , 'knows' , 'large' , 'largely' , 'last' , 'late' , 'lately' , 'later' , 'latest' , 'latter' , 'latterer' , 'latterest' , 'latterly' , 'latters' , 'layabout' , 'layabouts' , 'least' , 'led' , 'less' , 'lest' , 'let' , 'lets' , 'like' , 'liked' , 'likely' , 'likewise' , 'line' , 'links' , 'little' , 'llc' , 'llp' , 'long' , 'longer' , 'longest' , 'look' , 'looking' , 'looks' , 'lot' , 'lots' , 'lotted' , 'lotting' , 'low' , 'lower' , 'lt' , 'ltd' , 'made' , 'main' , 'mainly' , 'maintain' , 'maintained' , 'maintaining' , 'maintains' , 'make' , 'makes' , 'making' , 'man' , 'many' , 'may' , 'maybe' , 'mayest' , 'me' , 'mean' , 'means' , 'meantime' , 'meanwhile' , 'meanwhiles' , 'member' , 'members' , 'men' , 'merely' , 'method' , 'methods' , 'mg' , 'midst' , 'midsts' , 'might' , 'mights' , 'mill' , 'mine' , 'minus' , 'miss' , 'ml' , 'more' , 'moreover' , 'most' , 'mostly' , 'move' , 'moved' , 'mr' , 'mrs' , 'much' , 'mucher' , 'muchest' , 'mug' , 'must' , 'musth' , 'musths' , 'musts' , 'my' , 'myse' , 'myself' , 'na' , 'name' , 'namely' , 'natheless' , 'nathless' , 'nay' , 'nd' , 'near' , 'nearly' , 'neath' , 'neaths' , 'necessarier' , 'necessariest' , 'necessarily' , 'necessary' , 'need' , 'needed' , 'needing' , 'neednt' , 'needs' , 'neither' , 'net' , 'nethe' , 'nethermost' , 'never' , 'neverf' , 'neverless' , 'nevertheless' , 'new' , 'newer' , 'newest' , 'next' , 'nigh' , 'nigher' , 'nighest' , 'nine' , 'ninety' , 'ninth' , 'no' , 'nobodies' , 'nobody' , 'noes' , 'non' , 'none' , 'nonetheless' , 'noone' , 'nor' , 'normally' , 'nos' , 'not' , 'noted' , 'nothing' , 'nothings' , 'notwithstanding' , 'novel' , 'now' , 'nowhere' , 'nowheres' , 'number' , 'numbers' , 'obtain' , 'obtained' , 'obtaining' , 'obviously' , 'of' , 'off' , 'offest' , 'offs' , 'often' , 'oftener' , 'oftenest' , 'oh' , 'ok' , 'okay' , 'old' , 'older' , 'oldest' , 'omitted' , 'on' , 'once' , 'one' , 'ones' , 'oneself' , 'onest' , 'only' , 'ons' , 'onto' , 'open' , 'opened' , 'opening' , 'opens' , 'opposite' , 'or' , 'ord' , 'order' , 'ordered' , 'ordering' , 'orders' , 'orer' , 'orest' , 'org' , 'other' , 'others' , 'otherwise' , 'otherwiser' , 'otherwisest' , 'ought' , 'oughts' , 'our' , 'ours' , 'ourself' , 'ourselves' , 'out' , 'outcome' , 'outed' , 'outest' , 'outs' , 'outside' , 'outwith' , 'over' , 'overall' , 'overaller' , 'overallest' , 'overalls' , 'overcome' , 'overcoming' , 'overs' , 'owing' , 'own' , 'owned' , 'owning' , 'owns' , 'owt' , 'page' , 'pages' , 'part' , 'parted' , 'particular' , 'particularer' , 'particularest' , 'particularly' , 'particulars' , 'parting' , 'parts' , 'past' , 'per' , 'perhaps' , 'perspective' , 'place' , 'placed' , 'places' , 'plaintiff' , 'please' , 'pleased' , 'pleases' , 'plenties' , 'plenty' , 'plurality' , 'plus' , 'point' , 'pointed' , 'pointing' , 'points' , 'poorly' , 'possible' , 'possibly' , 'potentially' , 'pp' , 'preceding' , 'preceeding' , 'predominantly' , 'prepared' , 'present' , 'presented' , 'presenting' , 'presents' , 'presumably' , 'previously' , 'primarily' , 'prior' , 'pro' , 'probably' , 'problem' , 'problems' , 'proceding' , 'proceeding' , 'process' , 'processes' , 'promptly' , 'propose' , 'proud' , 'provide' , 'provided' , 'provides' , 'providing' , 'put' , 'puts' , 'qua' , 'que' , 'quickly' , 'quite' , 'qv' , 'ran' , 'rath' , 'rathe' , 'rather' , 'rathest' , 'rd' , 're' , 'readily' , 'really' , 'reasonably' , 'received' , 'receiving' , 'recent' , 'recently' , 'ref' , 'refs' , 'regard' , 'regarding' , 'regardless' , 'regards' , 'relate' , 'related' , 'relates' , 'relating' , 'relatively' , 'Relevant' , 'representative' , 'required' , 'res' , 'research' , 'respecting' , 'respectively' , 'resulted' , 'resulting' , 'results' , 'right' , 'role' , 'room' , 'rooms' , 'round' , 'run' , 'said' , 'saider' , 'saidest' , 'same' , 'samer' , 'sames' , 'samest' , 'sans' , 'sanserif' , 'sanserifs' , 'sanses' , 'saved' , 'saw' , 'say' , 'sayid' , 'saying' , 'says' , 'sayyid' , 'scope' , 'sec' , 'second' , 'secondly' , 'section' , 'see' , 'seeing' , 'seem' , 'seemed' , 'seeming' , 'seeminger' , 'seemingest' , 'seemings' , 'seems' , 'seen' , 'sees' , 'selected' , 'selecting' , 'self' , 'selves' , 'send' , 'sent' , 'senza' , 'separately' , 'seriouser' , 'seriousest' , 'seriously' , 'seven' , 'seventh' , 'several' , 'severaler' , 'severalest' , 'sFIG' , 'shall' , 'shalled' , 'shalling' , 'shalls' , 'she' , 'shed' , 'shes' , 'shortcoming' , 'should' , 'shoulded' , 'shoulding' , 'shouldnt' , 'shoulds' , 'show' , 'Showed' , 'showing' , 'shown' , 'showns' , 'shows' , 'side' , 'sides' , 'significant' , 'significantly' , 'similar' , 'similarly' , 'since' , 'sincere' , 'sine' , 'sines' , 'sith' , 'six' , 'sixth' , 'sixty' , 'slightly' , 'small' , 'smaller' , 'smallest' , 'so' , 'sobeit' , 'soer' , 'soest' , 'some' , 'somebody' , 'someday' , 'somehow' , 'someone' , 'somethan' , 'something' , 'sometime' , 'sometimer' , 'sometimes' , 'sometimest' , 'somewhat' , 'somewhere' , 'soon' , 'sorry' , 'specialized' , 'specifically' , 'specified' , 'specify' , 'specifying' , 'state' , 'states' , 'steps' , 'still' , 'stop' , 'stopped' , 'strongly' , 'sub' , 'subject' , 'substantially' , 'successfully' , 'such' , 'sufficiently' , 'suggest' , 'suitable' , 'suitably' , 'suited' , 'summat' , 'sup' , 'supped' , 'supping' , 'sups' , 'sure' , 'syn' , 'syne' , 'system' , 'systems' , 'take' , 'taken' , 'taking' , 'techniques' , 'tell' , 'ten' , 'tends' , 'tenth' , 'testify' , 'th' , 'than' , 'thank' , 'thanks' , 'thanx' , 'that' , 'thats' , 'the' , 'thee' , 'their' , 'theirs' , 'them' , 'themselves' , 'then' , 'thence' , 'thener' , 'thenest' , 'there' , 'thereafter' , 'thereby' , 'thered' , 'therefore' , 'therein' , 'thereof' , 'therer' , 'therere' , 'theres' , 'therest' , 'thereto' , 'thereupon' , 'therewith' , 'these' , 'they' , 'theyd' , 'theyre' , 'thick' , 'thin' , 'thine' , 'thing' , 'things' , 'think' , 'thinks' , 'third' , 'thirty' , 'this' , 'thises' , 'thorough' , 'thorougher' , 'thoroughest' , 'thoroughly' , 'those' , 'thou' , 'though' , 'thoughh' , 'thought' , 'thoughts' , 'thous' , 'thousand' , 'thouses' , 'three' , 'thro' , 'throug' , 'through' , 'througher' , 'throughest' , 'throughout' , 'thru' , 'thruer' , 'thruest' , 'thus' , 'thy' , 'thyself' , 'til' , 'till' , 'tilled' , 'tilling' , 'tills' , 'times' , 'tis' , 'to' , 'today' , 'together' , 'too' , 'took' , 'top' , 'toward' , 'towarder' , 'towardest' , 'towards' , 'tried' , 'tries' , 'truly' , 'try' , 'trying' , 'ts' , 'turn' , 'turned' , 'turning' , 'turns' , 'twas' , 'twelve' , 'twenty' , 'twice' , 'two' , 'typically' , 'umpteen' , 'un' , 'under' , 'underneath' , 'undoing' , 'unfortunately' , 'unless' , 'unlike' , 'unlikely' , 'unliker' , 'unlikest' , 'until' , 'unto' , 'up' , 'upon' , 'uponed' , 'uponing' , 'upons' , 'upped' , 'upping' , 'upwards' , 'US' , 'use' , 'used' , 'usedest' , 'useful' , 'usefully' , 'usefulness' , 'User' , 'username' , 'uses' , 'using' , 'usually' , 'utilizing' , 'uucp' , 'value' , 'various' , 'variouser' , 'variousest' , 'verier' , 'veriest' , 'versus' , 'very' , 'via' , 'view' , 'vis' , 'viser' , 'visest' , 'viz' , 'vol' , 'vols' , 'vs' , 'want' , 'wanted' , 'wanting' , 'wants' , 'was' , 'wasnt' , 'wast' , 'way' , 'ways' , 'we' , 'wed' , 'welcome' , 'well' , 'went' , 'were' , 'werent' , 'wert' , 'what' , 'whatever' , 'whateverer' , 'whateverest' , 'whats' , 'whatsoever' , 'whatsoeverer' , 'whatsoeverest' , 'wheen' , 'when' , 'whenas' , 'whence' , 'whencesoever' , 'whenever' , 'whensoever' , 'where' , 'whereafter' , 'whereas' , 'whereby' , 'wherefrom' , 'wherein' , 'whereinto' , 'whereof' , 'whereon' , 'wheres' , 'wheresoever' , 'whereto' , 'whereupon' , 'wherever' , 'wherewith' , 'wherewithal' , 'whether' , 'which' , 'whichever' , 'whichsoever' , 'while' , 'whiles' , 'whilst' , 'whim' , 'whither' , 'whithersoever' , 'who' , 'whod' , 'whoever' , 'whole' , 'whom' , 'whomever' , 'whos' , 'whose' , 'whoso' , 'whosoever' , 'why' , 'widely' , 'will' , 'willing' , 'wish' , 'with' , 'withal' , 'within' , 'without' , 'WO' , 'wonder' , 'wont' , 'words' , 'work' , 'worked' , 'working' , 'works' , 'would' , 'wouldnt' , 'woulds' , 'written' , 'www' , 'ye' , 'year' , 'years' , 'yes' , 'yet' , 'yon' , 'yond' , 'yonder' , 'you' , 'youd' , 'your' , 'yourabout' , 'youre' , 'yours' , 'yourself' , 'yourselves' , 'zero' , 'commonly' , 'study' , 'enables' , 'considered' , 'proposed' , 'scenarios' , 'result' , 'capable' , 'development'] #from sklearn.feature_extraction.text import CountVectorizer from keybert import KeyBERT kw_model=KeyBERT(model='AI-Growth-Lab/PatentSBERTa') import yake vectorizer = CountVectorizer(ngram_range=(1, 3), stop_words=patseer_stopwords) kw_extractor=yake.KeywordExtractor(top=50, stopwords=patseer_stopwords) candidates=kw_extractor.extract_keywords(text) candidates=[candidate[0] for candidate in candidates] from keyphrase_vectorizers import KeyphraseCountVectorizer #vectorizer=KeyphraseCountVectorizer(stop_words=patseer_stopwords) keywords=kw_model.extract_keywords(text,candidates, keyphrase_ngram_range=(1, 3),stop_words=patseer_stopwords, top_n=50,vectorizer=vectorizer) if keywords != []: st.info("Extracted keywords") keywords=pd.DataFrame(keywords, columns=["Keyword", "Score"]) st.table(keywords)