Top 200 @Mentions and #Hashtags used in tweets from 2017 Altmetric.com dump as Word Cloud using Python

A follow up to the Twitter profile description word cloud… I’ve created a hashtag word cloud from the 19.2 million hashtags used in the tweets collected by Altmetric.com

Top 200 Hashtags used in tweets collected by Altmetric.com
Top 200 @Mentions used in tweets collected by Altmetric.com

The Python code is VERY similar to the profile description word cloud code, however we have to turn off the ‘collocations’ option in the WordCloud module options to make it work as we expect.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Feb 10 16:19:56 2019

@author: tdbowman
"""
import io
import csv
import numpy as np
from wordcloud import WordCloud, STOPWORDS
from os import path
from PIL import Image


# current directory
currdir = path.dirname(__file__)

# from https://github.com/nikhilkumarsingh/wordcloud-example/blob/master/mywc.py
def create_wordcloud(text):

    # use cloud.png as mask for word cloud
    mask = np.array(Image.open(path.join(currdir, "cloud.png")))
    # create set of stopwords	
    #stop_words = list(STOPWORDS)
    
    # create wordcloud object
    wc = WordCloud(collocations=False,
                   background_color="white",
                   max_words=200, 
                   mask=mask,
                   width=1334,
                   height=945)
    	
    # generate wordcloud
    wc.generate(text)
    # save wordcloud
    wc.to_file(path.join(currdir, "wc_hashtags.png"))
    
if __name__ == "__main__":

    # Grab text from file and convert to list
    your_list = []
    with io.open('hashtags.csv', 'r', encoding='utf-8') as f:
        reader = csv.reader(x.replace('\0', '') for x in f)
        your_list = ','.join([i[0] for i in reader])    
    
    # generate wordcloud
    create_wordcloud(your_list)
Facebooktwittergoogle_plusredditpinterestlinkedinmailFacebooktwittergoogle_plusredditpinterestlinkedinmail

Leave a Reply

Your email address will not be published. Required fields are marked *