-
Notifications
You must be signed in to change notification settings - Fork 69
/
Copy pathupdate_posts.yaml.py
104 lines (90 loc) ยท 5.2 KB
/
update_posts.yaml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
'''
Update posts.yaml file with linkedin posts exported with apify
'''
import json
import re
import jinja2
YAML_TEMPLATE = jinja2.Template('''
{% for post in posts %}
- id: {{ post.id }}
type: {{ post.type }}
title: |
{{ post.title }}
author: {{ post.author }}
date: "{{ post.date }}"
url: {{ post.url }}
likes: {{ post.likes }}
reshares: {{ post.reshares }}
impressions: {{ post.impressions }}
comments: {{ post.comments }}
article_title: |
{{ post.article_title }}
article_url: {{ post.article_url }}
images: {{ post.images }}
image_url: {{ post.image_url }}
text: |
{{ post.text | indent(width=4) }}
{% endfor %}
''')
LINKEDIN_POSTS_FILENAME = '2025-01-30_linkedin_posts.json'
def clean_letters(text):
letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '!', '?', '.', ',', '"', "'"]
bold_letters = ['๐', '๐', '๐', '๐', '๐', '๐', '๐ ', '๐ก', '๐ข', '๐ฃ', '๐ค', '๐ฅ', '๐ฆ', '๐ง', '๐จ', '๐ฉ', '๐ช', '๐ซ', '๐ฌ', '๐ญ', '๐ฎ', '๐ฏ', '๐ฐ', '๐ฑ', '๐ฒ', '๐ณ', '๐', '๐', '๐', '๐', '๐', '๐
', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', 'โ', 'โ', '.', ',', '"', "'"]
# italic_letters = ['๐', '๐', '๐', '๐', '๐', '๐', '๐', 'โ', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐ ', '๐ก', '๐ข', '๐ฃ', '๐ค', '๐ฅ', '๐ฆ', '๐ง', '๐ด', '๐ต', '๐ถ', '๐ท', '๐ธ', '๐น', '๐บ', '๐ป', '๐ผ', '๐ฝ', '๐พ', '๐ฟ', '๐', '๐', '๐', '๐', '๐', '๐
', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '!', '?', '.', ',', '"', "'"]
italic_letters = ['๐ข', '๐ฃ', '๐ค', '๐ฅ', '๐ฆ', '๐ง', '๐จ', '๐ฉ', '๐ช', '๐ซ', '๐ฌ', '๐ญ', '๐ฎ', '๐ฏ', '๐ฐ', '๐ฑ', '๐ฒ', '๐ณ', '๐ด', '๐ต', '๐ถ', '๐ท', '๐ธ', '๐น', '๐บ', '๐ป', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐ ', '๐ก', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '!', '?', '.', ',', '"', "'"]
capitalized_letters = ['แด', 'ส', 'แด', 'แด
', 'แด', '๊ฐ', 'ษข', 'ส', 'ษช', 'แด', 'แด', 'ส', 'แด', 'ษด', 'แด', 'แด', 'q', 'ส', '๊ฑ', 'แด', 'แด', 'แด ', 'แดก', 'x', 'ส', 'แดข', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '๏น', '๏น', '๏น', '๏น', '"', "'"]
letters_mapping = str.maketrans({
**{bold_letter: letter for letter, bold_letter in zip(letters, bold_letters)},
**{italic_letter: letter for letter, italic_letter in zip(letters, italic_letters)},
**{capitalized_letters: letter for letter, capitalized_letters in zip(letters, capitalized_letters)},
})
return text.translate(letters_mapping)
def format_post(post):
date = post['postedAtISO'][:10]
text = clean_letters(post['text']).strip().replace('#', '')
title = text.split('\n')[0].strip()
text = '\n'.join(text.split('\n')[1:]).strip()
slug = re.sub('[^0-9a-z ]+', '', title.lower()).replace(' ', '_')[:50]
id = date + '_' + slug
image_url = '../assets/blog/' + id if 'images' in post else None
return {
'type': post['type'],
'id': id,
'title': title,
'text': text,
'article_title': post.get('article', {}).get('title') or 'null',
'article_url': post.get('article', {}).get('url') or 'null',
'images': post.get('images', []),
'image_url': image_url or 'null',
'author': post['authorName'],
'date': date,
'url': post['url'],
'likes': post['numLikes'],
'reshares': post['numShares'],
'impressions': post.get('numImpressions') or 'null',
'comments': post['numComments'],
}
def str_presenter(dumper, data):
"""
Preserve multiline strings when dumping yaml.
https://github.com/yaml/pyyaml/issues/240
"""
if "\n" in data:
# Remove trailing spaces messing out the output.
block = "\n".join([line.rstrip() for line in data.splitlines()])
if data.endswith("\n"):
block += "\n"
return dumper.represent_scalar("tag:yaml.org,2002:str", block, style="|")
return dumper.represent_scalar("tag:yaml.org,2002:str", data)
posts = json.loads(open(LINKEDIN_POSTS_FILENAME, encoding='utf-8').read())
posts = [
format_post(post)
for post in posts
if (
'type' in post and
'text' in post and
post['postedAtISO'] > '2022-09'
)
]
content = YAML_TEMPLATE.render(posts=posts)
open('posts.yaml', 'w', encoding='utf-8').write(content)