|
[ |
|
[ |
|
"Number of Document of Each Topic", |
|
{ |
|
"type": "pie", |
|
"kwargs": { |
|
"x": [ |
|
535838, |
|
206990, |
|
368022, |
|
200460, |
|
435310, |
|
250450, |
|
933732, |
|
271801, |
|
639890, |
|
387594, |
|
271359, |
|
1473798, |
|
459519, |
|
1101903, |
|
31659, |
|
2254859, |
|
591041 |
|
], |
|
"labels": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"autopct": "%1.1f%%", |
|
"colors": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
], |
|
"pctdistance": 1.2, |
|
"labeldistance": 1.5 |
|
}, |
|
"comment": "As shown in the graph above, over 20% of the documents are related to Business & Economics & Finance, which makes it the largest topic group in dataset. On the contrary, the group of Culture & Cultural geography contains the smallest number of documents among all topics." |
|
} |
|
], |
|
[ |
|
"Fraction of Words Corrected in Lines", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
0.005599317351029421, |
|
0.005491440909735792, |
|
0.010611897213357221, |
|
0.0061721529486005915, |
|
0.005040363960665401, |
|
0.0042498218252128035, |
|
0.008174887952855342, |
|
0.005098232906967347, |
|
0.005905725848762689, |
|
0.008048438948020924, |
|
0.005920233062429675, |
|
0.00738773833987446, |
|
0.006788916830535338, |
|
0.007824824620615435, |
|
0.007817009319252808, |
|
0.006894261391191716, |
|
0.007759051322619051 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
}, |
|
"comment": "In average, documents related to Shopping & Commodity have larger fraction of words corrected in lines." |
|
} |
|
], |
|
[ |
|
"Fraction of Lines Ending with Ellipsis", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
0.013608683903284204, |
|
0.01187771888948645, |
|
0.010704198151112872, |
|
0.013181499370177098, |
|
0.012342863597933462, |
|
0.01669603038717465, |
|
0.013958760786106517, |
|
0.011481605295821474, |
|
0.011727508302172751, |
|
0.013890752469918237, |
|
0.012950109439490815, |
|
0.015828153615401713, |
|
0.011233498318616135, |
|
0.013063106813702607, |
|
0.013101045053120094, |
|
0.012854514904197168, |
|
0.014225441730661032 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
}, |
|
"comment": "Compared with other topics, Personal Development & Human Resources & Career in average contain more lines ending with ellipsis." |
|
} |
|
], |
|
[ |
|
"Fraction of Lines Starting with Bullet Point", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
0.05958846605103529, |
|
0.06540916901994907, |
|
0.10871161367473074, |
|
0.057639202535687495, |
|
0.05391125998418046, |
|
0.048856823399157104, |
|
0.0919025139411848, |
|
0.06361059519326412, |
|
0.08348033701472354, |
|
0.09887120370776314, |
|
0.0654760782941809, |
|
0.07275273301463199, |
|
0.08648053868877607, |
|
0.0728023788334523, |
|
0.059507615068158916, |
|
0.08230576538579888, |
|
0.06015758928408362 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
}, |
|
"comment": "Shopping & Commodity related documents have higher percentage of lines starting with bullet point." |
|
} |
|
], |
|
[ |
|
"Number of Lines with Toxic Words", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
0.18993986988604764, |
|
0.8879124595391081, |
|
0.25990565781393504, |
|
0.26195250922877383, |
|
0.25880866508924677, |
|
1.059369135555999, |
|
0.13686689542609656, |
|
0.41953855946078195, |
|
0.8275813030364594, |
|
0.15215921815095176, |
|
0.13490615752563948, |
|
0.7103062970637767, |
|
0.10924031432867846, |
|
0.983178192635831, |
|
0.1341482674752835, |
|
0.14871528552339636, |
|
0.44260888838506973 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
}, |
|
"comment": "Personal Development & Human Resources & Career in average has more lines with toxic words." |
|
} |
|
], |
|
[ |
|
"Number of Toxic Words", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
0.2548288848495254, |
|
1.5926663123822407, |
|
0.4235181592404802, |
|
0.38067444876783396, |
|
0.32550136684202063, |
|
2.0770772609303254, |
|
0.20720185235163838, |
|
0.590086129190106, |
|
1.571774836299989, |
|
0.20227609302517582, |
|
0.18648727331689754, |
|
1.453566228207665, |
|
0.15104924932374938, |
|
2.337839174591593, |
|
0.20351242932499447, |
|
0.24778267732040007, |
|
0.7902395942075084 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
}, |
|
"comment": "Daily Life & Home & Lifestyle in average has more toxic words." |
|
} |
|
], |
|
[ |
|
"Word Count", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
524.2469683001206, |
|
634.8099570027538, |
|
332.5969724636027, |
|
654.5120023944927, |
|
634.4970021364086, |
|
747.0358714314234, |
|
624.2853688210322, |
|
570.2685052667209, |
|
746.3173279782462, |
|
427.6056492102561, |
|
603.5602799243807, |
|
470.1159928294108, |
|
559.1577497339609, |
|
450.07929463845727, |
|
682.6580435263274, |
|
559.7302638435485, |
|
514.1515901604118 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
}, |
|
"comment": "Documents in the topic of Personal Development & Human Resources & Career in average contain more words than other topics." |
|
} |
|
], |
|
[ |
|
"Mean Word Length", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
4.851116616082301, |
|
5.17314698008811, |
|
4.951553714759433, |
|
4.8636771295932055, |
|
5.165523097115738, |
|
4.64498800138652, |
|
5.233981234962708, |
|
5.094122002544284, |
|
5.191578081429402, |
|
4.872407702558401, |
|
5.077044932121297, |
|
4.911569182027774, |
|
5.25771470252484, |
|
4.990336313339119, |
|
5.138998450653204, |
|
5.165914329275205, |
|
4.943227231080612 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
}, |
|
"comment": "There is no significant variance in the average word length for different topic groups. However, Education related data contain longer words than others in general." |
|
} |
|
], |
|
[ |
|
"Number of Sentences", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
23.816802839664227, |
|
28.88356925455336, |
|
17.167653020743327, |
|
32.65256909109049, |
|
26.743545978727802, |
|
41.80899580754642, |
|
28.010818950191275, |
|
25.435358957472562, |
|
35.18096235290441, |
|
22.968376703457743, |
|
28.56101327024348, |
|
22.844366731397383, |
|
26.802678452904015, |
|
22.603309910218957, |
|
30.825168198616506, |
|
26.01027691753675, |
|
27.965867004150304 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
}, |
|
"comment": "Documents in the topic of Personal Development & Human Resources & Career usually contain more sentences." |
|
} |
|
], |
|
[ |
|
"Symbol to Word Ratio", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
0.0029508316364481296, |
|
0.002339527014691741, |
|
0.002746622681352375, |
|
0.0031207893125393786, |
|
0.0024594503072570637, |
|
0.003732116125668388, |
|
0.0029521717963945683, |
|
0.002009846839273012, |
|
0.0023335875319153666, |
|
0.0032912280108721562, |
|
0.0026740153080243275, |
|
0.0037401276658117497, |
|
0.0022685436825723537, |
|
0.0034624173472893424, |
|
0.0022837896768252673, |
|
0.002565854536163215, |
|
0.0035536009817103663 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
}, |
|
"comment": "Documents related to Entertainment & Travel & Hobby usually have higher percentage of symbols." |
|
} |
|
], |
|
[ |
|
"Fraction of Words with Alpha Character", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
0.9554513833362817, |
|
0.9672667625084445, |
|
0.945038227724378, |
|
0.9650443058450766, |
|
0.9662993498435797, |
|
0.9795101768513954, |
|
0.949647348401343, |
|
0.9644024275136092, |
|
0.9651040360235426, |
|
0.9515637138100507, |
|
0.9638773263904938, |
|
0.9544175710037947, |
|
0.9602638724414636, |
|
0.9533095901329957, |
|
0.9536863995733356, |
|
0.9573400271816177, |
|
0.9613916720605239 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
}, |
|
"comment": "The fraction of words with alpha character seems to be relatively consistent across different topics." |
|
} |
|
], |
|
[ |
|
"Number of Stop Words", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
106.35206163056745, |
|
139.14766896951542, |
|
61.915719712408496, |
|
150.11113937942733, |
|
141.82980634490363, |
|
156.21242563385906, |
|
122.75635942647355, |
|
122.98374178167114, |
|
152.60597915266686, |
|
83.42474857711936, |
|
128.65106740517174, |
|
93.49815985637109, |
|
114.57335387655353, |
|
86.25348147704472, |
|
162.30932752139992, |
|
114.21801717978818, |
|
106.4132116046095 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
}, |
|
"comment": "Culture & Cultural geography contains more stop words in average." |
|
} |
|
], |
|
[ |
|
"Has Curly Bracket", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
0.005337434075224228, |
|
0.0067539494661577855, |
|
0.01028199401122759, |
|
0.009842362566097974, |
|
0.011575658725965403, |
|
0.00931123976841685, |
|
0.02773600990434086, |
|
0.006582021405366427, |
|
0.009203144290424917, |
|
0.01040779785032792, |
|
0.008158933368710822, |
|
0.007557345036429687, |
|
0.010752547772779798, |
|
0.011963847997509762, |
|
0.012824157427587732, |
|
0.009383291815585807, |
|
0.008704979857573332 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
}, |
|
"comment": "Natural Science & Formal Science & Technology has a significantly higher rate in percentage of documents that contain curly bracket. It might be related to the coding data." |
|
} |
|
], |
|
[ |
|
"Number of Document Duplication", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
12.971086410444947, |
|
7.7029131842117975, |
|
6.170992495013885, |
|
7.104888755861518, |
|
8.650198708966025, |
|
6.623561589139549, |
|
6.508078335111145, |
|
9.093410252353744, |
|
6.089149697604276, |
|
7.057779532190901, |
|
7.702302116384568, |
|
6.5227466722033824, |
|
6.954972482095409, |
|
6.535254918082626, |
|
9.99308253577182, |
|
5.590145547903439, |
|
6.865564317873041 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
}, |
|
"comment": "Sports related documents have a higher number of duplication count." |
|
} |
|
], |
|
[ |
|
"Number of Dump Duplication", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
3.8719109133730716, |
|
3.26455384318083, |
|
2.2721848150382313, |
|
3.265644018756859, |
|
3.444853093197951, |
|
3.1923417847873825, |
|
2.7517906637022187, |
|
3.3698330764051643, |
|
2.710181437434559, |
|
3.1639266861716124, |
|
3.206342888940481, |
|
2.7590002157690536, |
|
2.8303421621304015, |
|
2.6106544768459656, |
|
3.9413752803310276, |
|
2.4888664878823907, |
|
3.094817449212491 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
}, |
|
"comment": "In average, Culture & Cultural geography related documents are duplicated across a higher number of common crawl dumps. Duplication of Shopping & Commodity appears in less dumps than others." |
|
} |
|
], |
|
[ |
|
"Number of Year Duplication", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
1.4484135130393887, |
|
1.4291463355717668, |
|
1.229893865040677, |
|
1.4503641624264192, |
|
1.452872665456801, |
|
1.442735076861649, |
|
1.3276539735170263, |
|
1.4222795353953812, |
|
1.328097016674741, |
|
1.406236938652301, |
|
1.4158586964132385, |
|
1.3305229074812153, |
|
1.3344910656577857, |
|
1.2914712093532734, |
|
1.5438579866704571, |
|
1.2835525414227675, |
|
1.404339123681775 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
}, |
|
"comment": "In average, Culture & Cultural geography related documents are duplicated across more years than other topics." |
|
} |
|
], |
|
[ |
|
"Maximum Span of Year Duplication", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
1.4810222492619038, |
|
1.4615875163051355, |
|
1.2437028221138953, |
|
1.4792776613788288, |
|
1.49291539362753, |
|
1.4697025354362148, |
|
1.3531216666024084, |
|
1.4549983259811405, |
|
1.3486599259247682, |
|
1.432375114165854, |
|
1.4460216908228583, |
|
1.352992743917416, |
|
1.3557807185339452, |
|
1.309416527589089, |
|
1.582425218737168, |
|
1.3025537295236642, |
|
1.4325029904862776 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
}, |
|
"comment": "In average, Culture & Cultural geography related documents are duplicated across a wider span of years." |
|
} |
|
], |
|
[ |
|
"Language Score", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
0.9413726660750416, |
|
0.9347130364355554, |
|
0.8847180050664069, |
|
0.9336572405289453, |
|
0.9420075430577804, |
|
0.9522977107155225, |
|
0.8831956938165678, |
|
0.9481278901144439, |
|
0.9241279717677588, |
|
0.9066709862541587, |
|
0.9270825804900252, |
|
0.9117954084131167, |
|
0.921528771738386, |
|
0.8992133008305735, |
|
0.9224377655046135, |
|
0.9152426551412108, |
|
0.9178671893764959 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
}, |
|
"comment": "Average language scores of different topic groups are mostly consistent. No significant differences are obeserved." |
|
} |
|
], |
|
[ |
|
"Fraction of Duplicate Lines", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
0.01235014200385377, |
|
0.012056246914591116, |
|
0.018525258494333133, |
|
0.012726935235443207, |
|
0.01165333793386552, |
|
0.010444387257042395, |
|
0.016149995700960602, |
|
0.012705431934865763, |
|
0.01519943556613772, |
|
0.014809953345215319, |
|
0.012686293057054212, |
|
0.01603496888664195, |
|
0.01596207137084465, |
|
0.016014032499666292, |
|
0.013610478505124169, |
|
0.01580386009616988, |
|
0.015060041023072804 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
}, |
|
"comment": "In average, Shopping & Commodity has a larger fraction of duplicate lines than others." |
|
} |
|
], |
|
[ |
|
"Fraction of Characters in Duplicate Lines", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
0.00501345725991589, |
|
0.004299959504074716, |
|
0.0073601226054879785, |
|
0.004651424152553605, |
|
0.00450348053495509, |
|
0.003909584113418541, |
|
0.0063485903557626774, |
|
0.00521503913729261, |
|
0.005782503341128245, |
|
0.005962335751386622, |
|
0.004749891704712697, |
|
0.006420052544922626, |
|
0.0063561887111620065, |
|
0.006466672218067342, |
|
0.004978436253072214, |
|
0.006108371322424041, |
|
0.0057332990952240126 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
}, |
|
"comment": "Shopping & Commodity usually has a larger fraction of characters in duplicate lines than others." |
|
} |
|
], |
|
[ |
|
"Fraction of Characters in Most Common Bigram", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
0.02614438964212445, |
|
0.02549244163757135, |
|
0.03593997020714517, |
|
0.026520648762908574, |
|
0.023796693998532542, |
|
0.019517664362790295, |
|
0.03146900938445295, |
|
0.026900122790576828, |
|
0.027486920194835916, |
|
0.029735671266585457, |
|
0.02724062185263462, |
|
0.030402249730981233, |
|
0.03031798250174187, |
|
0.034936591389516845, |
|
0.02730012031746535, |
|
0.029329317288923955, |
|
0.028440195287636943 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
} |
|
} |
|
], |
|
[ |
|
"Fraction of Characters in Most Common 3-gram", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
0.025877442339206684, |
|
0.026073122171118526, |
|
0.03794965393907832, |
|
0.02756936824343807, |
|
0.024589084236341825, |
|
0.019970321326854976, |
|
0.031104349287997282, |
|
0.027138921074492478, |
|
0.02674544851177018, |
|
0.03082668946385283, |
|
0.027642774270487825, |
|
0.031311152209273344, |
|
0.030596143210215625, |
|
0.0352048856850328, |
|
0.028135774349846692, |
|
0.029182052353507664, |
|
0.03023015052666528 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
} |
|
} |
|
], |
|
[ |
|
"Fraction of Characters in Most Common 4-gram", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
0.026649882448510714, |
|
0.026904261739744192, |
|
0.04086332064828129, |
|
0.0286899321496711, |
|
0.025495383586610822, |
|
0.020748509542508307, |
|
0.03171918073481819, |
|
0.027563495776633813, |
|
0.02693813171261885, |
|
0.03243470539147362, |
|
0.0287992899739741, |
|
0.032589127100319, |
|
0.031139178077804624, |
|
0.03630423964958027, |
|
0.029809457289325606, |
|
0.029522356378146167, |
|
0.032375986416410006 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
} |
|
} |
|
], |
|
[ |
|
"Fraction of Characters in Duplicate 5-grams", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
0.016989093741368057, |
|
0.01874268974292254, |
|
0.02001249239006167, |
|
0.01893345653851295, |
|
0.017576185062959156, |
|
0.013966567341084396, |
|
0.026648000310062814, |
|
0.021239561601745963, |
|
0.022547189937081085, |
|
0.016903077473431387, |
|
0.018277127900190513, |
|
0.019079382613460993, |
|
0.023467347573746446, |
|
0.021192854307303135, |
|
0.019157826340526964, |
|
0.021183180653813184, |
|
0.016589870490142093 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
} |
|
} |
|
], |
|
[ |
|
"Fraction of Characters in Duplicate 6-grams", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
0.011946923836249373, |
|
0.013297904416841108, |
|
0.014258151562338789, |
|
0.013153098583726782, |
|
0.012601072651000291, |
|
0.009837626317910313, |
|
0.01949595975959962, |
|
0.014924056163499448, |
|
0.015889641140216917, |
|
0.011840004108930956, |
|
0.012940087820238557, |
|
0.013424858515603134, |
|
0.016468963654372438, |
|
0.014839192401791004, |
|
0.01388493355575309, |
|
0.01498376261489033, |
|
0.011812586464028073 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
} |
|
} |
|
], |
|
[ |
|
"Fraction of Characters in Duplicate 7-grams", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
0.009095872215498261, |
|
0.010146405377015994, |
|
0.010487557518535542, |
|
0.009868429864354638, |
|
0.009802808055168035, |
|
0.007541438580109868, |
|
0.015129318997269138, |
|
0.011302686364124783, |
|
0.011969695536420487, |
|
0.00892604076557906, |
|
0.009759568234633746, |
|
0.010070709856859254, |
|
0.012419860047704056, |
|
0.011070038486862109, |
|
0.010547925069683646, |
|
0.011327481696653985, |
|
0.008957792606056945 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
} |
|
} |
|
], |
|
[ |
|
"Fraction of Characters in Duplicate 8-grams", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
0.0073128979182685684, |
|
0.008109486840255576, |
|
0.00813393016693516, |
|
0.007792984494504855, |
|
0.008002590936702558, |
|
0.006117199534770664, |
|
0.012284551039331444, |
|
0.009023639757214827, |
|
0.009496488981527608, |
|
0.007086539674993228, |
|
0.0076650824522217454, |
|
0.007877075837565403, |
|
0.00987496717434344, |
|
0.008652258777583252, |
|
0.008392133389867372, |
|
0.00901948167673584, |
|
0.007053229339496676 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
} |
|
} |
|
], |
|
[ |
|
"Fraction of Characters in Duplicate 9-grams", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
0.006148310898840968, |
|
0.00676529697013875, |
|
0.00643290688721836, |
|
0.006434352312383364, |
|
0.0067735701471297, |
|
0.005172565516416477, |
|
0.010288525380088334, |
|
0.007482544617336476, |
|
0.00780204339328974, |
|
0.005852660603046196, |
|
0.006240040171999708, |
|
0.006465362460507409, |
|
0.008165651028577293, |
|
0.006986331812620781, |
|
0.006928899525750178, |
|
0.007487698229960434, |
|
0.005728220555701086 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
} |
|
} |
|
], |
|
[ |
|
"Fraction of Characters in Duplicate 10-grams", |
|
{ |
|
"type": "barh", |
|
"kwargs": { |
|
"y": [ |
|
"Sports", |
|
"Society & Social Issues & Human Rights", |
|
"Shopping & Commodity", |
|
"Religion & Spirituality", |
|
"Politics & Government", |
|
"Personal Development & Human Resources & Career", |
|
"Natural Science & Formal Science & Technology", |
|
"Law & Justice", |
|
"Health & Wellness & Medicine", |
|
"Food & Drink & Cooking", |
|
"Environment", |
|
"Entertainment & Travel & Hobby", |
|
"Education", |
|
"Daily Life & Home & Lifestyle", |
|
"Culture & Cultural geography", |
|
"Business & Economics & Finance", |
|
"Arts" |
|
], |
|
"width": [ |
|
0.005325364079705381, |
|
0.005797357629820572, |
|
0.005283647214644124, |
|
0.005467491111249268, |
|
0.005879825006312822, |
|
0.004529332536092203, |
|
0.008882676950579147, |
|
0.006399831899960353, |
|
0.006645377495475746, |
|
0.005021569571200667, |
|
0.005306020206939719, |
|
0.00550360123328725, |
|
0.007013864844056383, |
|
0.005835955446724545, |
|
0.005845555947354781, |
|
0.00641447975612288, |
|
0.004809876486057196 |
|
], |
|
"color": [ |
|
[ |
|
1.0, |
|
0.4980392156862745, |
|
0.054901960784313725, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.7333333333333333, |
|
0.47058823529411764, |
|
1.0 |
|
], |
|
[ |
|
0.17254901960784313, |
|
0.6274509803921569, |
|
0.17254901960784313, |
|
1.0 |
|
], |
|
[ |
|
0.596078431372549, |
|
0.8745098039215686, |
|
0.5411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.8392156862745098, |
|
0.15294117647058825, |
|
0.1568627450980392, |
|
1.0 |
|
], |
|
[ |
|
1.0, |
|
0.596078431372549, |
|
0.5882352941176471, |
|
1.0 |
|
], |
|
[ |
|
0.5803921568627451, |
|
0.403921568627451, |
|
0.7411764705882353, |
|
1.0 |
|
], |
|
[ |
|
0.7725490196078432, |
|
0.6901960784313725, |
|
0.8352941176470589, |
|
1.0 |
|
], |
|
[ |
|
0.5490196078431373, |
|
0.33725490196078434, |
|
0.29411764705882354, |
|
1.0 |
|
], |
|
[ |
|
0.7686274509803922, |
|
0.611764705882353, |
|
0.5803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.8901960784313725, |
|
0.4666666666666667, |
|
0.7607843137254902, |
|
1.0 |
|
], |
|
[ |
|
0.9686274509803922, |
|
0.7137254901960784, |
|
0.8235294117647058, |
|
1.0 |
|
], |
|
[ |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
0.4980392156862745, |
|
1.0 |
|
], |
|
[ |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
0.7803921568627451, |
|
1.0 |
|
], |
|
[ |
|
0.7372549019607844, |
|
0.7411764705882353, |
|
0.13333333333333333, |
|
1.0 |
|
], |
|
[ |
|
0.8588235294117647, |
|
0.8588235294117647, |
|
0.5529411764705883, |
|
1.0 |
|
], |
|
[ |
|
0.09019607843137255, |
|
0.7450980392156863, |
|
0.8117647058823529, |
|
1.0 |
|
] |
|
] |
|
}, |
|
"x_label": "Metrics", |
|
"subplots_adjust": { |
|
"left": 0.37, |
|
"right": 0.98 |
|
} |
|
} |
|
] |
|
] |