[ [ "Number of Document of Each Topic", { "type": "pie", "kwargs": { "x": [ 535838, 206990, 368022, 200460, 435310, 250450, 933732, 271801, 639890, 387594, 271359, 1473798, 459519, 1101903, 31659, 2254859, 591041 ], "labels": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "autopct": "%1.1f%%", "colors": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ], "pctdistance": 1.2, "labeldistance": 1.5 }, "comment": "As shown in the graph above, over 20% of the documents are related to Business & Economics & Finance, which makes it the largest topic group in dataset. On the contrary, the group of Culture & Cultural geography contains the smallest number of documents among all topics." } ], [ "Fraction of Words Corrected in Lines", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 0.005599317351029421, 0.005491440909735792, 0.010611897213357221, 0.0061721529486005915, 0.005040363960665401, 0.0042498218252128035, 0.008174887952855342, 0.005098232906967347, 0.005905725848762689, 0.008048438948020924, 0.005920233062429675, 0.00738773833987446, 0.006788916830535338, 0.007824824620615435, 0.007817009319252808, 0.006894261391191716, 0.007759051322619051 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 }, "comment": "In average, documents related to Shopping & Commodity have larger fraction of words corrected in lines." } ], [ "Fraction of Lines Ending with Ellipsis", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 0.013608683903284204, 0.01187771888948645, 0.010704198151112872, 0.013181499370177098, 0.012342863597933462, 0.01669603038717465, 0.013958760786106517, 0.011481605295821474, 0.011727508302172751, 0.013890752469918237, 0.012950109439490815, 0.015828153615401713, 0.011233498318616135, 0.013063106813702607, 0.013101045053120094, 0.012854514904197168, 0.014225441730661032 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 }, "comment": "Compared with other topics, Personal Development & Human Resources & Career in average contain more lines ending with ellipsis." } ], [ "Fraction of Lines Starting with Bullet Point", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 0.05958846605103529, 0.06540916901994907, 0.10871161367473074, 0.057639202535687495, 0.05391125998418046, 0.048856823399157104, 0.0919025139411848, 0.06361059519326412, 0.08348033701472354, 0.09887120370776314, 0.0654760782941809, 0.07275273301463199, 0.08648053868877607, 0.0728023788334523, 0.059507615068158916, 0.08230576538579888, 0.06015758928408362 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 }, "comment": "Shopping & Commodity related documents have higher percentage of lines starting with bullet point." } ], [ "Number of Lines with Toxic Words", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 0.18993986988604764, 0.8879124595391081, 0.25990565781393504, 0.26195250922877383, 0.25880866508924677, 1.059369135555999, 0.13686689542609656, 0.41953855946078195, 0.8275813030364594, 0.15215921815095176, 0.13490615752563948, 0.7103062970637767, 0.10924031432867846, 0.983178192635831, 0.1341482674752835, 0.14871528552339636, 0.44260888838506973 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 }, "comment": "Personal Development & Human Resources & Career in average has more lines with toxic words." } ], [ "Number of Toxic Words", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 0.2548288848495254, 1.5926663123822407, 0.4235181592404802, 0.38067444876783396, 0.32550136684202063, 2.0770772609303254, 0.20720185235163838, 0.590086129190106, 1.571774836299989, 0.20227609302517582, 0.18648727331689754, 1.453566228207665, 0.15104924932374938, 2.337839174591593, 0.20351242932499447, 0.24778267732040007, 0.7902395942075084 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 }, "comment": "Daily Life & Home & Lifestyle in average has more toxic words." } ], [ "Word Count", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 524.2469683001206, 634.8099570027538, 332.5969724636027, 654.5120023944927, 634.4970021364086, 747.0358714314234, 624.2853688210322, 570.2685052667209, 746.3173279782462, 427.6056492102561, 603.5602799243807, 470.1159928294108, 559.1577497339609, 450.07929463845727, 682.6580435263274, 559.7302638435485, 514.1515901604118 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 }, "comment": "Documents in the topic of Personal Development & Human Resources & Career in average contain more words than other topics." } ], [ "Mean Word Length", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 4.851116616082301, 5.17314698008811, 4.951553714759433, 4.8636771295932055, 5.165523097115738, 4.64498800138652, 5.233981234962708, 5.094122002544284, 5.191578081429402, 4.872407702558401, 5.077044932121297, 4.911569182027774, 5.25771470252484, 4.990336313339119, 5.138998450653204, 5.165914329275205, 4.943227231080612 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 }, "comment": "There is no significant variance in the average word length for different topic groups. However, Education related data contain longer words than others in general." } ], [ "Number of Sentences", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 23.816802839664227, 28.88356925455336, 17.167653020743327, 32.65256909109049, 26.743545978727802, 41.80899580754642, 28.010818950191275, 25.435358957472562, 35.18096235290441, 22.968376703457743, 28.56101327024348, 22.844366731397383, 26.802678452904015, 22.603309910218957, 30.825168198616506, 26.01027691753675, 27.965867004150304 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 }, "comment": "Documents in the topic of Personal Development & Human Resources & Career usually contain more sentences." } ], [ "Symbol to Word Ratio", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 0.0029508316364481296, 0.002339527014691741, 0.002746622681352375, 0.0031207893125393786, 0.0024594503072570637, 0.003732116125668388, 0.0029521717963945683, 0.002009846839273012, 0.0023335875319153666, 0.0032912280108721562, 0.0026740153080243275, 0.0037401276658117497, 0.0022685436825723537, 0.0034624173472893424, 0.0022837896768252673, 0.002565854536163215, 0.0035536009817103663 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 }, "comment": "Documents related to Daily Life & Home & Lifestyle usually have higher percentage of symbols." } ], [ "Fraction of Words with Alpha Character", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 0.9554513833362817, 0.9672667625084445, 0.945038227724378, 0.9650443058450766, 0.9662993498435797, 0.9795101768513954, 0.949647348401343, 0.9644024275136092, 0.9651040360235426, 0.9515637138100507, 0.9638773263904938, 0.9544175710037947, 0.9602638724414636, 0.9533095901329957, 0.9536863995733356, 0.9573400271816177, 0.9613916720605239 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 }, "comment": "The fraction of words with alpha character seems to be relatively consistent across different topics." } ], [ "Number of Stop Words", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 106.35206163056745, 139.14766896951542, 61.915719712408496, 150.11113937942733, 141.82980634490363, 156.21242563385906, 122.75635942647355, 122.98374178167114, 152.60597915266686, 83.42474857711936, 128.65106740517174, 93.49815985637109, 114.57335387655353, 86.25348147704472, 162.30932752139992, 114.21801717978818, 106.4132116046095 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 }, "comment": "Culture & Cultural geography contains more stop words in average." } ], [ "Has Curly Bracket", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 0.005337434075224228, 0.0067539494661577855, 0.01028199401122759, 0.009842362566097974, 0.011575658725965403, 0.00931123976841685, 0.02773600990434086, 0.006582021405366427, 0.009203144290424917, 0.01040779785032792, 0.008158933368710822, 0.007557345036429687, 0.010752547772779798, 0.011963847997509762, 0.012824157427587732, 0.009383291815585807, 0.008704979857573332 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 }, "comment": "Natural Science & Formal Science & Technology has a significantly higher rate in percentage of documents that contain curly bracket. It might be related to the coding data." } ], [ "Number of Document Duplication", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 12.971086410444947, 7.7029131842117975, 6.170992495013885, 7.104888755861518, 8.650198708966025, 6.623561589139549, 6.508078335111145, 9.093410252353744, 6.089149697604276, 7.057779532190901, 7.702302116384568, 6.5227466722033824, 6.954972482095409, 6.535254918082626, 9.99308253577182, 5.590145547903439, 6.865564317873041 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 }, "comment": "Sports related documents have a higher number of duplication count." } ], [ "Number of Dump Duplication", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 3.8719109133730716, 3.26455384318083, 2.2721848150382313, 3.265644018756859, 3.444853093197951, 3.1923417847873825, 2.7517906637022187, 3.3698330764051643, 2.710181437434559, 3.1639266861716124, 3.206342888940481, 2.7590002157690536, 2.8303421621304015, 2.6106544768459656, 3.9413752803310276, 2.4888664878823907, 3.094817449212491 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 }, "comment": "In average, Culture & Cultural geography related documents are duplicated across a higher number of common crawl dumps. Duplication of Shopping & Commodity appears in less dumps than others." } ], [ "Number of Year Duplication", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 1.4484135130393887, 1.4291463355717668, 1.229893865040677, 1.4503641624264192, 1.452872665456801, 1.442735076861649, 1.3276539735170263, 1.4222795353953812, 1.328097016674741, 1.406236938652301, 1.4158586964132385, 1.3305229074812153, 1.3344910656577857, 1.2914712093532734, 1.5438579866704571, 1.2835525414227675, 1.404339123681775 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 }, "comment": "In average, Culture & Cultural geography related documents are duplicated across more years than other topics." } ], [ "Maximum Span of Year Duplication", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 1.4810222492619038, 1.4615875163051355, 1.2437028221138953, 1.4792776613788288, 1.49291539362753, 1.4697025354362148, 1.3531216666024084, 1.4549983259811405, 1.3486599259247682, 1.432375114165854, 1.4460216908228583, 1.352992743917416, 1.3557807185339452, 1.309416527589089, 1.582425218737168, 1.3025537295236642, 1.4325029904862776 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 }, "comment": "In average, Culture & Cultural geography related documents are duplicated across a wider span of years." } ], [ "Language Score", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 0.9413726660750416, 0.9347130364355554, 0.8847180050664069, 0.9336572405289453, 0.9420075430577804, 0.9522977107155225, 0.8831956938165678, 0.9481278901144439, 0.9241279717677588, 0.9066709862541587, 0.9270825804900252, 0.9117954084131167, 0.921528771738386, 0.8992133008305735, 0.9224377655046135, 0.9152426551412108, 0.9178671893764959 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 }, "comment": "Average language scores of different topic groups are mostly consistent. No significant differences are obeserved." } ], [ "Fraction of Duplicate Lines", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 0.01235014200385377, 0.012056246914591116, 0.018525258494333133, 0.012726935235443207, 0.01165333793386552, 0.010444387257042395, 0.016149995700960602, 0.012705431934865763, 0.01519943556613772, 0.014809953345215319, 0.012686293057054212, 0.01603496888664195, 0.01596207137084465, 0.016014032499666292, 0.013610478505124169, 0.01580386009616988, 0.015060041023072804 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 }, "comment": "In average, Shopping & Commodity has a larger fraction of duplicate lines than others." } ], [ "Fraction of Characters in Duplicate Lines", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 0.00501345725991589, 0.004299959504074716, 0.0073601226054879785, 0.004651424152553605, 0.00450348053495509, 0.003909584113418541, 0.0063485903557626774, 0.00521503913729261, 0.005782503341128245, 0.005962335751386622, 0.004749891704712697, 0.006420052544922626, 0.0063561887111620065, 0.006466672218067342, 0.004978436253072214, 0.006108371322424041, 0.0057332990952240126 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 }, "comment": "Shopping & Commodity usually has a larger fraction of characters in duplicate lines than others." } ], [ "Fraction of Characters in Most Common Bigram", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 0.02614438964212445, 0.02549244163757135, 0.03593997020714517, 0.026520648762908574, 0.023796693998532542, 0.019517664362790295, 0.03146900938445295, 0.026900122790576828, 0.027486920194835916, 0.029735671266585457, 0.02724062185263462, 0.030402249730981233, 0.03031798250174187, 0.034936591389516845, 0.02730012031746535, 0.029329317288923955, 0.028440195287636943 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 } } ], [ "Fraction of Characters in Most Common 3-gram", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 0.025877442339206684, 0.026073122171118526, 0.03794965393907832, 0.02756936824343807, 0.024589084236341825, 0.019970321326854976, 0.031104349287997282, 0.027138921074492478, 0.02674544851177018, 0.03082668946385283, 0.027642774270487825, 0.031311152209273344, 0.030596143210215625, 0.0352048856850328, 0.028135774349846692, 0.029182052353507664, 0.03023015052666528 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 } } ], [ "Fraction of Characters in Most Common 4-gram", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 0.026649882448510714, 0.026904261739744192, 0.04086332064828129, 0.0286899321496711, 0.025495383586610822, 0.020748509542508307, 0.03171918073481819, 0.027563495776633813, 0.02693813171261885, 0.03243470539147362, 0.0287992899739741, 0.032589127100319, 0.031139178077804624, 0.03630423964958027, 0.029809457289325606, 0.029522356378146167, 0.032375986416410006 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 } } ], [ "Fraction of Characters in Duplicate 5-grams", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 0.016989093741368057, 0.01874268974292254, 0.02001249239006167, 0.01893345653851295, 0.017576185062959156, 0.013966567341084396, 0.026648000310062814, 0.021239561601745963, 0.022547189937081085, 0.016903077473431387, 0.018277127900190513, 0.019079382613460993, 0.023467347573746446, 0.021192854307303135, 0.019157826340526964, 0.021183180653813184, 0.016589870490142093 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 } } ], [ "Fraction of Characters in Duplicate 6-grams", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 0.011946923836249373, 0.013297904416841108, 0.014258151562338789, 0.013153098583726782, 0.012601072651000291, 0.009837626317910313, 0.01949595975959962, 0.014924056163499448, 0.015889641140216917, 0.011840004108930956, 0.012940087820238557, 0.013424858515603134, 0.016468963654372438, 0.014839192401791004, 0.01388493355575309, 0.01498376261489033, 0.011812586464028073 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 } } ], [ "Fraction of Characters in Duplicate 7-grams", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 0.009095872215498261, 0.010146405377015994, 0.010487557518535542, 0.009868429864354638, 0.009802808055168035, 0.007541438580109868, 0.015129318997269138, 0.011302686364124783, 0.011969695536420487, 0.00892604076557906, 0.009759568234633746, 0.010070709856859254, 0.012419860047704056, 0.011070038486862109, 0.010547925069683646, 0.011327481696653985, 0.008957792606056945 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 } } ], [ "Fraction of Characters in Duplicate 8-grams", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 0.0073128979182685684, 0.008109486840255576, 0.00813393016693516, 0.007792984494504855, 0.008002590936702558, 0.006117199534770664, 0.012284551039331444, 0.009023639757214827, 0.009496488981527608, 0.007086539674993228, 0.0076650824522217454, 0.007877075837565403, 0.00987496717434344, 0.008652258777583252, 0.008392133389867372, 0.00901948167673584, 0.007053229339496676 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 } } ], [ "Fraction of Characters in Duplicate 9-grams", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 0.006148310898840968, 0.00676529697013875, 0.00643290688721836, 0.006434352312383364, 0.0067735701471297, 0.005172565516416477, 0.010288525380088334, 0.007482544617336476, 0.00780204339328974, 0.005852660603046196, 0.006240040171999708, 0.006465362460507409, 0.008165651028577293, 0.006986331812620781, 0.006928899525750178, 0.007487698229960434, 0.005728220555701086 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 } } ], [ "Fraction of Characters in Duplicate 10-grams", { "type": "barh", "kwargs": { "y": [ "Sports", "Society & Social Issues & Human Rights", "Shopping & Commodity", "Religion & Spirituality", "Politics & Government", "Personal Development & Human Resources & Career", "Natural Science & Formal Science & Technology", "Law & Justice", "Health & Wellness & Medicine", "Food & Drink & Cooking", "Environment", "Entertainment & Travel & Hobby", "Education", "Daily Life & Home & Lifestyle", "Culture & Cultural geography", "Business & Economics & Finance", "Arts" ], "width": [ 0.005325364079705381, 0.005797357629820572, 0.005283647214644124, 0.005467491111249268, 0.005879825006312822, 0.004529332536092203, 0.008882676950579147, 0.006399831899960353, 0.006645377495475746, 0.005021569571200667, 0.005306020206939719, 0.00550360123328725, 0.007013864844056383, 0.005835955446724545, 0.005845555947354781, 0.00641447975612288, 0.004809876486057196 ], "color": [ [ 1.0, 0.4980392156862745, 0.054901960784313725, 1.0 ], [ 1.0, 0.7333333333333333, 0.47058823529411764, 1.0 ], [ 0.17254901960784313, 0.6274509803921569, 0.17254901960784313, 1.0 ], [ 0.596078431372549, 0.8745098039215686, 0.5411764705882353, 1.0 ], [ 0.8392156862745098, 0.15294117647058825, 0.1568627450980392, 1.0 ], [ 1.0, 0.596078431372549, 0.5882352941176471, 1.0 ], [ 0.5803921568627451, 0.403921568627451, 0.7411764705882353, 1.0 ], [ 0.7725490196078432, 0.6901960784313725, 0.8352941176470589, 1.0 ], [ 0.5490196078431373, 0.33725490196078434, 0.29411764705882354, 1.0 ], [ 0.7686274509803922, 0.611764705882353, 0.5803921568627451, 1.0 ], [ 0.8901960784313725, 0.4666666666666667, 0.7607843137254902, 1.0 ], [ 0.9686274509803922, 0.7137254901960784, 0.8235294117647058, 1.0 ], [ 0.4980392156862745, 0.4980392156862745, 0.4980392156862745, 1.0 ], [ 0.7803921568627451, 0.7803921568627451, 0.7803921568627451, 1.0 ], [ 0.7372549019607844, 0.7411764705882353, 0.13333333333333333, 1.0 ], [ 0.8588235294117647, 0.8588235294117647, 0.5529411764705883, 1.0 ], [ 0.09019607843137255, 0.7450980392156863, 0.8117647058823529, 1.0 ] ] }, "x_label": "Metrics", "subplots_adjust": { "left": 0.37, "right": 0.98 } } ] ]