~fkfd/one_top_song

one_top_song/data/one_song_words.py -rw-r--r-- 746 bytes
26ca7fbcFrederick Yin Remove "ripped" (Redecorate) 10 days ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import os

# list all tracks
tracks = [fn.replace(".txt", "") for fn in os.listdir("all") if fn.endswith(".txt")]
lyrics = {}

for track in tracks:
    f = open(f"all/{track}.txt")
    # find every discrete word, deduped and normalized with best effort
    lyrics[track] = set(
        [
            word.strip(",.?!'\"():").replace("'s", "").replace("'d", "").lower()
            for word in f.read().split()
        ]
    )
    f.close()

rf = open("results", "w")

for track in tracks:
    other_tracks = [t for t in tracks if t != track]
    for word in lyrics[track]:
        # if word does not appear in any other track
        if not any([(word in lyrics[o]) for o in other_tracks]):
            rf.write(f"{track}\t{word}\n")

rf.close()