From f479557cceecc0e12310c98ddb1116380dac8081 Mon Sep 17 00:00:00 2001 From: Anshul Date: Mon, 1 Mar 2021 11:05:14 +0530 Subject: [PATCH 01/16] Convert audio to video Using moviepy --- audio2video.py | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 audio2video.py diff --git a/audio2video.py b/audio2video.py new file mode 100644 index 0000000..9df8bda --- /dev/null +++ b/audio2video.py @@ -0,0 +1,10 @@ +from moviepy.editor import * + +# Import the audio(Insert to location of your audio instead of audioClip.mp3) +audio = AudioFileClip(r"C:\Users\91998\Documents\abc.mp3") +# Import the Image and set its duration same as the audio (Insert the location of your photo instead of photo.jpg) +clip = ImageClip(r"C:\Users\91998\Documents\abc.jpg").set_duration(audio.duration) +# Set the audio of the clip +clip = clip.set_audio(audio) +# Export the clip +clip.write_videofile(r"C:\Users\91998\Documents\abc.mp4", fps=0.1) From 9d98b5a6ddd9139f97e70151de92d30056e7eb5b Mon Sep 17 00:00:00 2001 From: Anshul Date: Mon, 1 Mar 2021 11:07:35 +0530 Subject: [PATCH 02/16] Added audio to video in Readme. --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 814d249..37f6cbb 100644 --- a/README.md +++ b/README.md @@ -19,3 +19,5 @@ Some useful python code snippets and utilities. [Snap7](https://github.com/anshulkhare7/PythonUtils/blob/master/snap7) - Snap7 is a python library to connect to Siemens PLCs. [Merge PDF](https://github.com/anshulkhare7/PythonUtils/blob/master/mergePDF.py) - Merge PDF files using PyPDF2 package. Install with `pip install pypdf2`. Based on [this.](https://realpython.com/pdf-python/). Requires python 3.8. + +[Convert Audio to Video](https://github.com/anshulkhare7/PythonUtils/blob/master/audio2video.py) - Convert audio file to video file. From ba29c25c77cafc4b109f7e599df880544ca947f0 Mon Sep 17 00:00:00 2001 From: Anshul Date: Wed, 10 Mar 2021 13:58:26 +0530 Subject: [PATCH 03/16] joinVideo added --- audio2video.py | 43 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/audio2video.py b/audio2video.py index 9df8bda..a5cf795 100644 --- a/audio2video.py +++ b/audio2video.py @@ -1,10 +1,37 @@ from moviepy.editor import * +from os import listdir +from os.path import isfile, join -# Import the audio(Insert to location of your audio instead of audioClip.mp3) -audio = AudioFileClip(r"C:\Users\91998\Documents\abc.mp3") -# Import the Image and set its duration same as the audio (Insert the location of your photo instead of photo.jpg) -clip = ImageClip(r"C:\Users\91998\Documents\abc.jpg").set_duration(audio.duration) -# Set the audio of the clip -clip = clip.set_audio(audio) -# Export the clip -clip.write_videofile(r"C:\Users\91998\Documents\abc.mp4", fps=0.1) +def convert(inDir, imagePath, outDir): + + onlyfiles = [f for f in listdir(inDir) if isfile(join(inDir, f))] + + for mp3File in onlyfiles: + if(mp3File.endswith(".mp3")): + filePath = inDir + mp3File + audio = AudioFileClip(filePath) + clip = ImageClip(imagePath).set_duration(audio.duration) + fileName = outDir + os.path.splitext(mp3File)[0] + '.mp4' + clip = clip.set_audio(audio) + clip.write_videofile(fileName, fps=0.1) + +def joinVideo(inputdirectory): + + onlyfiles = [f for f in listdir(inputdirectory) if isfile(join(inputdirectory, f))] + + clips = [] + for videofile in onlyfiles: + if(videofile.endswith(".mp4")): + filepath = inputdirectory + videofile + clip = VideoFileClip(filepath) + clips.append(clip) + + final_clip = concatenate_videoclips(clips) + final_clip.write_videofile(inputdirectory+'joined-video.mp4') + +if __name__=="__main__": + inDir = r"C:\Users\91998\Documents\\" + imagePath = r"C:\Users\91998\Documents\myimage.jpg" + outDir = r"C:\Users\91998\Documents\\" + convert(inDir, imagePath, outDir) + joinVideo(outDir) From be2d695ec4f120b3237e22f1eaa69a3180e53fbd Mon Sep 17 00:00:00 2001 From: Anshul Date: Wed, 10 Mar 2021 13:59:01 +0530 Subject: [PATCH 04/16] Update audio2video.py --- audio2video.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/audio2video.py b/audio2video.py index a5cf795..7d53891 100644 --- a/audio2video.py +++ b/audio2video.py @@ -30,8 +30,8 @@ def joinVideo(inputdirectory): final_clip.write_videofile(inputdirectory+'joined-video.mp4') if __name__=="__main__": - inDir = r"C:\Users\91998\Documents\\" - imagePath = r"C:\Users\91998\Documents\myimage.jpg" - outDir = r"C:\Users\91998\Documents\\" + inDir = r"C:\Users\anshul\Documents\\" + imagePath = r"C:\Users\anshul\Documents\myimage.jpg" + outDir = r"C:\Users\anshul\Documents\\" convert(inDir, imagePath, outDir) joinVideo(outDir) From 0f0d318cb6c1afd9eabea350efc0e03a707ba8bd Mon Sep 17 00:00:00 2001 From: Anshul Date: Thu, 1 Apr 2021 10:29:30 +0530 Subject: [PATCH 05/16] IPCam Signed-off-by: Anshul --- ipcam.py | 53 +++++++++++++++++++++++++++++++++++++++ removeSilenceFromVideo.py | 2 +- 2 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 ipcam.py diff --git a/ipcam.py b/ipcam.py new file mode 100644 index 0000000..17f7dae --- /dev/null +++ b/ipcam.py @@ -0,0 +1,53 @@ +import cv2, base64, requests, json, os, re, argparse +import numpy as np +from time import sleep + +def main(camIp, camName): + print(camIp) + print(camName) + cap = cv2.VideoCapture("rtsp://"+camIp) + #url = 'http://localhost:8888/image/base64/' + url = 'http://one.innocule.tech/image/base64/' + + while(cap.isOpened()): + retval, image = cap.read() + _, im_arr = cv2.imencode('.jpg', image) + jpg_as_text = base64.b64encode(np.array(im_arr).tostring()).decode('utf-8') + #print(jpg_as_text[0:50]) + data = { 'image' : str(jpg_as_text), 'source' : camName} + headers = {'Content-type': 'application/json', 'Authorization':'Basic YWRtaW46SW5ub2NAMTIz'} + response = requests.post(url, json = data, headers = headers) + print(response.text) + break + + cap.release() + cv2.destroyAllWindows() + +if __name__=="__main__": + # construct the argument parser and parse the arguments + ap = argparse.ArgumentParser() + ap.add_argument("-i", "--cameraip", required=True, type=str, help="IP Camera host") + ap.add_argument("-n", "--cameraname", type=str, required=True, help="Camera identifier name") + args = vars(ap.parse_args()) + + camIP = args["cameraip"] + camName = args["cameraname"] + + main(camIP, camName) + +# while(cap.isOpened()): +# ret, frame = cap.read() +# cv2.imshow('frame', frame) +# if cv2.waitKey(20) & 0xFF == ord('q'): +# break +# cap.release() +# cv2.destroyAllWindows() +#im_bytes = im_arr.tobytes() +# mydata = np.fromstring(image, dtype=np.uint8) +# cv2.imwrite("abc.jpg", cap.read()[1]) +#from onvif import ONVIFCamera +#mycam = ONVIFCamera('192.168.1.64', 80, 'anshul', 'innocule7', '/etc/onvif/python-onvif/wsdl') + +# Get Hostname +#resp = mycam.devicemgmt.GetHostname() +#print 'My camera`s hostname: ' + str(resp.Name) \ No newline at end of file diff --git a/removeSilenceFromVideo.py b/removeSilenceFromVideo.py index ff4cc9c..b9b65f6 100644 --- a/removeSilenceFromVideo.py +++ b/removeSilenceFromVideo.py @@ -3,4 +3,4 @@ video = Video() -video.humanly_remove_silence_parts_from_video("/ap.mp4", "/ap-silence.mp4", 25, minimum_interval=0.7) \ No newline at end of file +video.humanly_remove_silence_parts_from_video("/ap.mp4", "/ap-silence.mp4", 25, minimum_interval=0.7) \ No newline at end of file From f6affcb3da8feadf928115378aca2b1029cbd7f0 Mon Sep 17 00:00:00 2001 From: Anshul Khare Date: Thu, 15 Apr 2021 13:33:08 +0530 Subject: [PATCH 06/16] gpio manual for testin --- gpio.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 gpio.py diff --git a/gpio.py b/gpio.py new file mode 100644 index 0000000..471f457 --- /dev/null +++ b/gpio.py @@ -0,0 +1,35 @@ +import RPi.GPIO as GPIO +import argparse +from time import sleep + +def main(period): + + #Assign Pins + blower_pin_1=40 + blower_pin_2=38 + feeder_pin_1=37 + unused_board_pins = (3, 5, 7, 8, 10, 11, 12, 13, 15, 16, 18, 19, 21, 22, 23, 24, 26, 29, 31, 32, 33, 35, 36, 37, 38) + relay_pins = (blower_pin_1, blower_pin_2, feeder_pin_1) + + #Setup GPIO + GPIO.setmode(GPIO.BOARD) + GPIO.setwarnings(False) + GPIO.setup(unused_board_pins, GPIO.OUT, initial=True) + GPIO.setup(relay_pins, GPIO.OUT, initial=True) + + #Turn on PINs + GPIO.output(blower_pin_1, False) + GPIO.output(blower_pin_2, False) + + sleep(int(period)) + + #Turn off PINs + GPIO.output(blower_pin_1, True) + GPIO.output(blower_pin_2, True) + +if __name__=="__main__": + ap = argparse.ArgumentParser() + ap.add_argument("-p", "--period", required=True, type=str, help="Time in seconds to keep the pin On") + args = vars(ap.parse_args()) + period = args["period"] + main(period) From f2ebf31f68d39f7a9e6e514d123109885cf2d9ee Mon Sep 17 00:00:00 2001 From: Anshul Date: Fri, 20 Sep 2024 16:38:15 +0530 Subject: [PATCH 07/16] restructure and add youtube util --- .gitignore | 3 + YT/playlistInfo.py | 60 +++++++++++ YT/playlist_titles.txt | 47 ++++++++ convert/generatePDF.py | 35 ++++++ mergePDF.py => convert/mergePDF.py | 31 ++++-- convert/onlineBookToPDF.py | 99 +++++++++++++++++ data-transform/main.py | 101 ++++++++++++++++++ data-transform/output.csv | 75 +++++++++++++ .../pointInPolygon.py | 0 download/downloadBuffettLetters.py | 38 +++++++ .../downloadYoutube.py | 4 +- whatsapp.py => download/whatsapp.py | 0 ipcam.py | 53 --------- gpio.py => raspi/gpio.py | 0 audio2video.py => video/audio2video.py | 0 checkFPS.py => video/checkFPS.py | 0 extractFrames.py => video/extractFrames.py | 0 video/ipcam.py | 54 ++++++++++ joinSplitVideo.py => video/joinSplitVideo.py | 8 +- video/joinVideos.py | 12 +++ .../removeSilenceFromVideo.py | 0 .../videoStreamingRaspi.py | 0 22 files changed, 552 insertions(+), 68 deletions(-) create mode 100644 YT/playlistInfo.py create mode 100644 YT/playlist_titles.txt create mode 100644 convert/generatePDF.py rename mergePDF.py => convert/mergePDF.py (64%) create mode 100644 convert/onlineBookToPDF.py create mode 100644 data-transform/main.py create mode 100644 data-transform/output.csv rename pointInPolygon.py => data-transform/pointInPolygon.py (100%) create mode 100644 download/downloadBuffettLetters.py rename downloadYoutube.py => download/downloadYoutube.py (56%) rename whatsapp.py => download/whatsapp.py (100%) delete mode 100644 ipcam.py rename gpio.py => raspi/gpio.py (100%) rename audio2video.py => video/audio2video.py (100%) rename checkFPS.py => video/checkFPS.py (100%) rename extractFrames.py => video/extractFrames.py (100%) create mode 100644 video/ipcam.py rename joinSplitVideo.py => video/joinSplitVideo.py (89%) create mode 100644 video/joinVideos.py rename removeSilenceFromVideo.py => video/removeSilenceFromVideo.py (100%) rename videoStreamingRaspi.py => video/videoStreamingRaspi.py (100%) diff --git a/.gitignore b/.gitignore index 4236406..2383f01 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +.DS_Store +.idea/* + # media files *.mp4 *.pdf diff --git a/YT/playlistInfo.py b/YT/playlistInfo.py new file mode 100644 index 0000000..0fab295 --- /dev/null +++ b/YT/playlistInfo.py @@ -0,0 +1,60 @@ +import os +import sys + +from googleapiclient.discovery import build + +# Read the API key from environment variable +api_key = os.environ.get('YOUTUBE_API_KEY') + +if not api_key: + raise ValueError("Please set the YOUTUBE_API_KEY environment variable") + +# Set up the YouTube API client +youtube = build('youtube', 'v3', developerKey=api_key) + +def get_playlist_titles(playlist_id): + titles = [] + next_page_token = None + + while True: + # Make the API request + request = youtube.playlistItems().list( + part='snippet', + playlistId=playlist_id, + maxResults=50, + pageToken=next_page_token + ) + response = request.execute() + + # Extract video titles + for item in response['items']: + titles.append(item['snippet']['title']) + + # Check if there are more pages + next_page_token = response.get('nextPageToken') + if not next_page_token: + break + + return titles + +def main(): + if len(sys.argv) != 2: + print("Usage: python script_name.py ") + sys.exit(1) + + playlist_id = sys.argv[1] + video_titles = get_playlist_titles(playlist_id) + + # Print the titles + for title in video_titles: + print(title) + + # Save titles to a file + with open('playlist_titles.txt', 'w', encoding='utf-8') as f: + for title in video_titles: + f.write(f"{title}\n") + + print(f"\nTitles have been saved to playlist_titles.txt") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/YT/playlist_titles.txt b/YT/playlist_titles.txt new file mode 100644 index 0000000..ded66ad --- /dev/null +++ b/YT/playlist_titles.txt @@ -0,0 +1,47 @@ +Leander Paes on the Power of One Percent, and Dealing with Triumphs and Disasters in Tennis and Life +Kenneth Andrade on Spotting Great Businesses, Market Cycles, and Playing the Long Game of Investing +Bogumil Baranowski on Building Generational Wealth and Playing the Infinite Game of Investing +Navneet Munot on India’s Amrit Kaal, Role of Luck, Building Resilience, and the Power of Kindness +Shyam Sekhar on Mastering the Long Game of #Investing +Rohit Chauhan on Difficulties of Compounding, Life’s True Riches, and Power of Investing Simplicity +Swanand Kelkar on the Purpose of Life, Dealing with Boring Nothingness, and Playing the Long Game +Nilesh Shah on Transformation of India, Lifelong Learning, and Importance of Humility in Investing +Investing is all about Probability - Morgan Housel - The One Percent Show #shorts +Life is about Leaps - Vinod Sethi - The One Percent Show #shorts +The Only Purpose of Building Wealth - Morgan Housel - The One Percent Show #shorts +Writing Looks Easier Than It is - Morgan Housel - The One Percent Show #Shorts +Mohnish Pabrai on Playing Your Own Investing Game #shorts +Annie Duke on Why Life is Like Poker, Anatomy of Decision Making, and the Power of Quitting +Guy Spier on the India Opportunity, Long Term Investing, and the Power of Checklists +Vitaliy Katsenelson on Thoughtful Arrogance, Stoicism, and Having Soul in the Game +PD Mangan on Reverse Ageing, Diet and Exercise, and Solution to the Epidemic of Chronic Diseases +Kalpen Parekh on Financial Freedom, Universal Truths of Investing, and the Idea of Lifelong Learning +William Green on Charlie Munger, Good Parenting, and Secrets of a Happy and Abundant Life +Nick Maggiulli on Biggest Lies in Personal Finance, Feeling Rich, and the Role of Luck in Investing +Ian Cassel on Microcap Investing, Dealing with Self-Doubt, and the Secret of Happiness +Is Investing a Game of Skill or Luck? +Kuntal Shah on Market Cycles, Lessons from Financial History, and Twelve Equations of Life +Guy Spier on Warren Buffett's Annual Meeting and the Power of Your Social Network +Rajeev Thakkar on Investing Character, Lifelong Learning, and Building an Antifragile Life +India of 2050 - Vinod Sethi on The One Percent Show with Vishal Khandelwal +Guy Spier on the Powerful Idea of Writing Thank You Notes - The One Percent Show +Guy Spier on Finding Your North Star and the Power of Compounding Goodwill - The One Percent Show +India is Hope for Humanity - Guy Spier - The One Percent Show +The Cycle of Life - Manish Chokhani - The One Percent Show #Shorts +Two Kinds of People - Arnold Van Den Berg - The One Percent Show #Shorts +Arnold Van Den Berg on the Power of Your Subconscious Mind +The One Percent Show - Intro +Ramesh Damani on Focus, Backing Up Your Truck, and Lessons from the Wizards of Dalal Street +Sankaran Naren on Market Cycles, Common Sense, and Art of Being a Contrarian Investor +Monika Halan on Money Mantras, Biggest Financial Regrets, and Handling Life's Real Wealth Well +A Lesson on Kindness - Barry Ritholtz - The One Percent Show with Vishal Khandelwal +Barry Ritholtz on Survivorship Bias, Thinking Independently, and Creating Meaning in Life +Samit Vartak on Success vs Happiness and the Art of Managing Other People's Money +Harsh Mariwala on Risk-Taking, Dealing with Failures, and Building a Sound Corporate Culture +Saurabh Mukherjea on Compounding, Wealth Creation, and Seizing the India Opportunity +Sanjay Bakshi on Teaching, Spirituality, and Dealing with Uncertainties in Life and Investing +Morgan Housel on Seeking Simplicity in a Complex World - The One Percent Show +Mohnish Pabrai on Learning from Mistakes and Reinventing Yourself - The One Percent Show +Radhika Gupta on Living a Brave Life and Creating Your Own Destiny +The One Percent Show with Vishal Khandelwal - Ep. 2 - Vinod Sethi +The One Percent Show with Vishal Khandelwal - Ep. 1 - Manish Chokhani diff --git a/convert/generatePDF.py b/convert/generatePDF.py new file mode 100644 index 0000000..3eba208 --- /dev/null +++ b/convert/generatePDF.py @@ -0,0 +1,35 @@ +import os +import random +from fpdf import FPDF + +def generate_pdf(file_path, size_kb): + pdf = FPDF() + pdf.add_page() + pdf.set_font('Arial', size=12) + # Add some text content to the PDF + pdf.cell(200, 10, txt="Sample PDF content", ln=True, align='C') + + # Save the PDF file + pdf.output(file_path) + + # Adjust the file size + with open(file_path, 'ab') as f: + current_size = os.path.getsize(file_path) + target_size = size_kb * 1024 + if current_size < target_size: + # Add padding to reach the target size + f.write(b'0' * (target_size - current_size)) + +def generate_sample_pdfs(prefix, min_size_kb, max_size_kb, num_files=100, folder="output"): + # Create the folder if it doesn't exist + os.makedirs(folder, exist_ok=True) + + for i in range(num_files): + size_kb = random.randint(min_size_kb, max_size_kb) + file_name = f"{prefix}_{size_kb}KB_{i+1}.pdf" + file_path = os.path.join(folder, file_name) + generate_pdf(file_path, size_kb) + print(f"Generated: {file_path} of size {size_kb} KB") + +# Example usage: Specify folder where the files will be created +generate_sample_pdfs(prefix="sample", min_size_kb=100, max_size_kb=1024, num_files=25, folder="/Users/anshul/tmp/s3") \ No newline at end of file diff --git a/mergePDF.py b/convert/mergePDF.py similarity index 64% rename from mergePDF.py rename to convert/mergePDF.py index c64029e..4c78890 100755 --- a/mergePDF.py +++ b/convert/mergePDF.py @@ -1,7 +1,7 @@ #!/usr/bin/env python from PyPDF2 import PdfFileReader, PdfFileWriter -import pikepdf +#import pikepdf def remove_page(path, output, page_to_Remove): pdf_writer = PdfFileWriter() @@ -70,13 +70,24 @@ def decrypt_pdf2(input_path, output_path, password): print("Total pages:", num_pages) if __name__ == '__main__': - doc_1 = '/Users/anshul/Downloads/input01.pdf' - doc_2 = '/Users/anshul/Downloads/input02.pdf' + doc_1 = '/Users/anshul/Documents/docs/E-Aadhar-password-ANSH1981.pdf' + doc_2 = '/Users/anshul/Documents/docs/E-Aadhar-password-ANSH1981-dec.pdf' + # doc_3 = '/Users/anshul/Downloads/Shruti-Quantum-9991336776.pdf' + # doc_4 = '/Users/anshul/Downloads/Shruti-Quantum-9991353164.pdf' + # doc_5 = '/Users/anshul/Downloads/page-05.pdf' + # doc_6 = '/Users/anshul/Downloads/page-06.pdf' + # doc_7 = '/Users/anshul/Downloads/page-07.pdf' + # doc_8 = '/Users/anshul/Downloads/page-08.pdf' + # doc_9 = '/Users/anshul/Downloads/page-09.pdf' + # doc_10 = '/Users/anshul/Downloads/page-10.pdf' + #extract_information('/Users/anshul/Downloads/abc.pdf') - paths = [doc_1, doc_2] - merge_pdfs(paths, output='/Users/anshul/Downloads/output.pdf') - #remove_page(doc_1,doc_2,1) - #encrypted = '/Users/anshul/Downloads/abc.pdf' - #decrypted = '/Users/anshul/Downloads/abc.pdf' - ##password = 'password' - #decrypt_pdf(encrypted, decrypted, password) + # paths = [doc_1, doc_2, doc_3, doc_4, doc_5, doc_6, doc_7, doc_8, doc_9, doc_10 ] + # paths = [doc_1, doc_2, doc_3] + # merge_pdfs(paths, output='/Users/anshul/Downloads/life-certificate-2021.pdf') + # remove_page(doc_1,doc_2,2) + # encrypted = '/Users/anshul/Downloads/ELSS-Statement-encrypted.pdf' + # decrypted = '/Users/anshul/Downloads/ELSS-Statement.pdf' + # password = 'AMYPK6172H' + decrypt_pdf(doc_1, doc_2, "ANSH1981") + # decrypt_pdf(doc_2, doc_4, "BLXPS4482F") diff --git a/convert/onlineBookToPDF.py b/convert/onlineBookToPDF.py new file mode 100644 index 0000000..8d4babc --- /dev/null +++ b/convert/onlineBookToPDF.py @@ -0,0 +1,99 @@ +import requests +from bs4 import BeautifulSoup +from urllib.parse import urljoin, urlparse +from reportlab.lib.pagesizes import letter +from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image +from reportlab.lib.styles import getSampleStyleSheet +from io import BytesIO +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +import networkx as nx + +class WebsiteToPDFConverter: + def __init__(self, base_url): + self.base_url = base_url + self.domain = urlparse(base_url).netloc + self.graph = nx.DiGraph() + self.content = [] + self.styles = getSampleStyleSheet() + + # Set up Selenium WebDriver + chrome_options = Options() + chrome_options.add_argument("--headless") + self.driver = webdriver.Chrome(options=chrome_options) + + def get_soup(self, url): + self.driver.get(url) + WebDriverWait(self.driver, 10).until( + EC.presence_of_element_located((By.TAG_NAME, "body")) + ) + return BeautifulSoup(self.driver.page_source, 'html.parser') + + def extract_content(self, url, soup): + main_content = soup.find('main') or soup.find('article') or soup.find('div', class_='content') + if main_content: + for element in main_content.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'img']): + if element.name.startswith('h'): + self.content.append(Paragraph(element.text, self.styles[element.name])) + elif element.name == 'p': + self.content.append(Paragraph(element.text, self.styles['Normal'])) + elif element.name == 'img': + img_url = urljoin(url, element['src']) + try: + img_data = BytesIO(requests.get(img_url).content) + img = Image(img_data, width=300, height=200) + self.content.append(img) + except Exception as e: + print(f"Error processing image {img_url}: {e}") + self.content.append(Spacer(1, 12)) + + def build_site_graph(self, url, parent=None): + if url in self.graph: + return + + print(f"Mapping: {url}") + self.graph.add_node(url) + if parent: + self.graph.add_edge(parent, url) + + soup = self.get_soup(url) + nav_menu = soup.find('nav') or soup.find('ul', class_='menu') + + if nav_menu: + for link in nav_menu.find_all('a', href=True): + next_url = urljoin(url, link['href']) + if next_url.startswith(self.base_url) and self.domain in next_url: + self.build_site_graph(next_url, url) + + def process_site(self): + self.build_site_graph(self.base_url) + + for url in nx.dfs_preorder_nodes(self.graph, self.base_url): + print(f"Processing: {url}") + soup = self.get_soup(url) + self.extract_content(url, soup) + + def create_pdf(self, output_filename): + doc = SimpleDocTemplate(output_filename, pagesize=letter) + doc.build(self.content) + + def cleanup(self): + self.driver.quit() + +def main(): + base_url = input("Enter the base URL of the website: ") + output_filename = input("Enter the output PDF filename: ") + + converter = WebsiteToPDFConverter(base_url) + try: + converter.process_site() + converter.create_pdf(output_filename) + print(f"PDF created: {output_filename}") + finally: + converter.cleanup() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/data-transform/main.py b/data-transform/main.py new file mode 100644 index 0000000..df82fa5 --- /dev/null +++ b/data-transform/main.py @@ -0,0 +1,101 @@ +import csv +import json + +# Your JSON data +json_data = '{"list":[{"shortId":"aplive","platform":14,"platformAccount":0,"platformSection":17},{"shortId":"m00001","platform":5,"platformAccount":0,"platformSection":0},{"shortId":"m00002","platform":6,"platformAccount":0,"platformSection":0},{"shortId":"m00003","platform":12,"platformAccount":0,"platformSection":0},{"shortId":"m00004","platform":10,"platformAccount":19,"platformSection":16},{"shortId":"m00005","platform":10,"platformAccount":20,"platformSection":16},{"shortId":"m00006","platform":11,"platformAccount":0,"platformSection":0},{"shortId":"m00007","platform":2,"platformAccount":9,"platformSection":6},{"shortId":"m00009","platform":2,"platformAccount":9,"platformSection":8},{"shortId":"m00010","platform":2,"platformAccount":10,"platformSection":6},{"shortId":"m00012","platform":2,"platformAccount":10,"platformSection":8},{"shortId":"m00013","platform":4,"platformAccount":13,"platformSection":11},{"shortId":"m00014","platform":4,"platformAccount":13,"platformSection":12},{"shortId":"m00015","platform":4,"platformAccount":14,"platformSection":11},{"shortId":"m00016","platform":4,"platformAccount":14,"platformSection":12},{"shortId":"m00017","platform":3,"platformAccount":11,"platformSection":9},{"shortId":"m00018","platform":3,"platformAccount":11,"platformSection":10},{"shortId":"m00019","platform":3,"platformAccount":12,"platformSection":9},{"shortId":"m00020","platform":3,"platformAccount":12,"platformSection":10},{"shortId":"m00021","platform":1,"platformAccount":1,"platformSection":1},{"shortId":"m00022","platform":1,"platformAccount":1,"platformSection":2},{"shortId":"m00023","platform":1,"platformAccount":1,"platformSection":3},{"shortId":"m00024","platform":1,"platformAccount":1,"platformSection":4},{"shortId":"m00025","platform":1,"platformAccount":1,"platformSection":5},{"shortId":"m00026","platform":1,"platformAccount":2,"platformSection":1},{"shortId":"m00027","platform":1,"platformAccount":2,"platformSection":2},{"shortId":"m00028","platform":1,"platformAccount":2,"platformSection":3},{"shortId":"m00029","platform":1,"platformAccount":2,"platformSection":4},{"shortId":"m00030","platform":1,"platformAccount":2,"platformSection":5},{"shortId":"m00031","platform":1,"platformAccount":3,"platformSection":1},{"shortId":"m00032","platform":1,"platformAccount":3,"platformSection":2},{"shortId":"m00033","platform":1,"platformAccount":3,"platformSection":3},{"shortId":"m00034","platform":1,"platformAccount":3,"platformSection":4},{"shortId":"m00035","platform":1,"platformAccount":3,"platformSection":5},{"shortId":"m00036","platform":1,"platformAccount":4,"platformSection":1},{"shortId":"m00037","platform":1,"platformAccount":4,"platformSection":2},{"shortId":"m00038","platform":1,"platformAccount":4,"platformSection":3},{"shortId":"m00039","platform":1,"platformAccount":4,"platformSection":4},{"shortId":"m00040","platform":1,"platformAccount":4,"platformSection":5},{"shortId":"m00041","platform":1,"platformAccount":5,"platformSection":1},{"shortId":"m00042","platform":1,"platformAccount":5,"platformSection":2},{"shortId":"m00043","platform":1,"platformAccount":5,"platformSection":3},{"shortId":"m00044","platform":1,"platformAccount":5,"platformSection":4},{"shortId":"m00045","platform":1,"platformAccount":5,"platformSection":5},{"shortId":"m00046","platform":1,"platformAccount":6,"platformSection":1},{"shortId":"m00047","platform":1,"platformAccount":6,"platformSection":2},{"shortId":"m00048","platform":1,"platformAccount":6,"platformSection":3},{"shortId":"m00049","platform":1,"platformAccount":6,"platformSection":4},{"shortId":"m00050","platform":1,"platformAccount":6,"platformSection":5},{"shortId":"m00051","platform":1,"platformAccount":7,"platformSection":1},{"shortId":"m00052","platform":1,"platformAccount":7,"platformSection":2},{"shortId":"m00053","platform":1,"platformAccount":7,"platformSection":3},{"shortId":"m00054","platform":1,"platformAccount":7,"platformSection":4},{"shortId":"m00055","platform":1,"platformAccount":7,"platformSection":5},{"shortId":"m00056","platform":1,"platformAccount":8,"platformSection":1},{"shortId":"m00057","platform":1,"platformAccount":8,"platformSection":2},{"shortId":"m00058","platform":1,"platformAccount":8,"platformSection":3},{"shortId":"m00059","platform":1,"platformAccount":8,"platformSection":4},{"shortId":"m00060","platform":1,"platformAccount":8,"platformSection":5},{"shortId":"m00061","platform":7,"platformAccount":15,"platformSection":13},{"shortId":"m00062","platform":7,"platformAccount":16,"platformSection":13},{"shortId":"m00063","platform":9,"platformAccount":0,"platformSection":14},{"shortId":"m00064","platform":9,"platformAccount":0,"platformSection":15},{"shortId":"m00065","platform":8,"platformAccount":17,"platformSection":0},{"shortId":"m00066","platform":8,"platformAccount":18,"platformSection":0},{"shortId":"m00067","platform":13,"platformAccount":0,"platformSection":0},{"shortId":"m00068","platform":3,"platformAccount":11,"platformSection":18},{"shortId":"m00069","platform":3,"platformAccount":12,"platformSection":18},{"shortId":"m00070","platform":14,"platformAccount":0,"platformSection":19},{"shortId":"m00071","platform":14,"platformAccount":0,"platformSection":20},{"shortId":"m00072","platform":9,"platformAccount":0,"platformSection":21},{"shortId":"m00073","platform":1,"platformAccount":1,"platformSection":22},{"shortId":"m00074","platform":1,"platformAccount":2,"platformSection":22},{"shortId":"m00075","platform":15,"platformAccount":0,"platformSection":0}]}' + +# Load JSON data +data = json.loads(json_data) + +# Extract the list of dictionaries +data_list = data['list'] + +# Mapping of platform values +platform_mapping = { + 1: "MKTPlatformYoutube", + 2: "MKTPlatformFacebook", + 3: "MKTPlatformInstagram", + 4: "MKTPlatformWhatsapp", + 5: "MKTPlatformEmail", + 6: "MKTPlatformTelegram", + 7: "MKTPlatformTwitter", + 8: "MKTPlatformKoo", + 9: "MKTPlatformApp", + 10: "MKTPlatformLinkedIn", + 11: "MKTPlatformGoogleAds", + 12: "MKTPlatformQuora", + 13: "MKTPlatformFacebookAds", + 14: "MKTPlatformWebsite", + 15: "MKTPlatformWhatsappChannel", +} + +platform_account_mapping = { + 1: "MKTPlatformAccountYTMainHindi", + 2: "MKTPlatformAccountYTMainEnglish", + 3: "MKTPlatformAccountYTSadho", + 4: "MKTPlatformAccountYTShastraGyan", + 5: "MKTPlatformAccountYTYuvaMitra", + 6: "MKTPlatformAccountYTFreshBlades", + 7: "MKTPlatformAccountYTSaintsAndScriptures", + 8: "MKTPlatformAccountYTNotEvenOne", + 9: "MKTPlatformAccountFBMainHindi", + 10: "MKTPlatformAccountFBMainEnglish", + 11: "MKTPlatformAccountInstaMainHindi", + 12: "MKTPlatformAccountInstaMainEnglish", + 13: "MKTPlatformAccountWhatsAppOutreach", + 14: "MKTPlatformAccountWhatsAppRP", + 15: "MKTPlatformAccountTwitterHindi", + 16: "MKTPlatformAccountTwitterEnglish", + 17: "MKTPlatformAccountKooHindi", + 18: "MKTPlatformAccountKooEnglish", + 19: "MKTPlatformAccountLinkedInAPProfile", + 20: "MKTPlatformAccountLinkedInPAFPage", +} + +platform_section_mapping = { + 1 :"MKTPlatformSectionYTDescription", + 2 :"MKTPlatformSectionYTPinnedComment", + 3 :"MKTPlatformSectionYTCard", + 4 :"MKTPlatformSectionYTEndSlide", + 5 :"MKTPlatformSectionYTCommunity", + 6 :"MKTPlatformSectionFBPage", + 7 :"DeprecatedMKTPlatformSectionFBAd", + 8 :"MKTPlatformSectionFBStories", + 9 :"MKTPlatformSectionInstaStories", + 10:"MKTPlatformSectionInstaProfileLink", + 11:"MKTPlatformSectionWhatsappBroadcast", + 12:"MKTPlatformSectionWhatsappStories", + 13:"MKTPlatformSectionTwitterFeed", + 14:"MKTPlatformSectionAppWisdomFeedH", + 15:"MKTPlatformSectionAppWisdomFeedE", + 16:"MKTPlatformSectionLinkedInPage", + 17:"MKTPlatformSectionWebsiteGrace", + 18:"MKTPlatformSectionInstaBroadcast", + 19:"MKTPlatformSectionWebsiteArticles", + 20:"MKTPlatformSectionWebsiteHome", + 21:"MKTPlatformSectionAppGitaFeed", + 22:"MKTPlatformSectionYTCommentReply" +} + +# Replace "platform" values +for row in data_list: + row['platformSection'] = platform_section_mapping.get(row['platformSection'], row['platformSection']) + row['platformAccount'] = platform_account_mapping.get(row['platformAccount'], row['platformAccount']) + row['platform'] = platform_mapping.get(row['platform'], row['platform']) + +# Specify the CSV file path +csv_file_path = 'output.csv' + +# Write to CSV +with open(csv_file_path, 'w', newline='') as csvfile: + fieldnames = data_list[0].keys() + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + + # Write header + writer.writeheader() + + # Write rows + for row in data_list: + writer.writerow(row) + +print(f'CSV file "{csv_file_path}" has been created.') diff --git a/data-transform/output.csv b/data-transform/output.csv new file mode 100644 index 0000000..dad9edc --- /dev/null +++ b/data-transform/output.csv @@ -0,0 +1,75 @@ +shortId,platform,platformAccount,platformSection +aplive,MKTPlatformWebsite,0,MKTPlatformSectionWebsiteGrace +m00001,MKTPlatformEmail,0,0 +m00002,MKTPlatformTelegram,0,0 +m00003,MKTPlatformQuora,0,0 +m00004,MKTPlatformLinkedIn,MKTPlatformAccountLinkedInAPProfile,MKTPlatformSectionLinkedInPage +m00005,MKTPlatformLinkedIn,MKTPlatformAccountLinkedInPAFPage,MKTPlatformSectionLinkedInPage +m00006,MKTPlatformGoogleAds,0,0 +m00007,MKTPlatformFacebook,MKTPlatformAccountFBMainHindi,MKTPlatformSectionFBPage +m00009,MKTPlatformFacebook,MKTPlatformAccountFBMainHindi,MKTPlatformSectionFBStories +m00010,MKTPlatformFacebook,MKTPlatformAccountFBMainEnglish,MKTPlatformSectionFBPage +m00012,MKTPlatformFacebook,MKTPlatformAccountFBMainEnglish,MKTPlatformSectionFBStories +m00013,MKTPlatformWhatsapp,MKTPlatformAccountWhatsAppOutreach,MKTPlatformSectionWhatsappBroadcast +m00014,MKTPlatformWhatsapp,MKTPlatformAccountWhatsAppOutreach,MKTPlatformSectionWhatsappStories +m00015,MKTPlatformWhatsapp,MKTPlatformAccountWhatsAppRP,MKTPlatformSectionWhatsappBroadcast +m00016,MKTPlatformWhatsapp,MKTPlatformAccountWhatsAppRP,MKTPlatformSectionWhatsappStories +m00017,MKTPlatformInstagram,MKTPlatformAccountInstaMainHindi,MKTPlatformSectionInstaStories +m00018,MKTPlatformInstagram,MKTPlatformAccountInstaMainHindi,MKTPlatformSectionInstaProfileLink +m00019,MKTPlatformInstagram,MKTPlatformAccountInstaMainEnglish,MKTPlatformSectionInstaStories +m00020,MKTPlatformInstagram,MKTPlatformAccountInstaMainEnglish,MKTPlatformSectionInstaProfileLink +m00021,MKTPlatformYoutube,MKTPlatformAccountYTMainHindi,MKTPlatformSectionYTDescription +m00022,MKTPlatformYoutube,MKTPlatformAccountYTMainHindi,MKTPlatformSectionYTPinnedComment +m00023,MKTPlatformYoutube,MKTPlatformAccountYTMainHindi,MKTPlatformSectionYTCard +m00024,MKTPlatformYoutube,MKTPlatformAccountYTMainHindi,MKTPlatformSectionYTEndSlide +m00025,MKTPlatformYoutube,MKTPlatformAccountYTMainHindi,MKTPlatformSectionYTCommunity +m00026,MKTPlatformYoutube,MKTPlatformAccountYTMainEnglish,MKTPlatformSectionYTDescription +m00027,MKTPlatformYoutube,MKTPlatformAccountYTMainEnglish,MKTPlatformSectionYTPinnedComment +m00028,MKTPlatformYoutube,MKTPlatformAccountYTMainEnglish,MKTPlatformSectionYTCard +m00029,MKTPlatformYoutube,MKTPlatformAccountYTMainEnglish,MKTPlatformSectionYTEndSlide +m00030,MKTPlatformYoutube,MKTPlatformAccountYTMainEnglish,MKTPlatformSectionYTCommunity +m00031,MKTPlatformYoutube,MKTPlatformAccountYTSadho,MKTPlatformSectionYTDescription +m00032,MKTPlatformYoutube,MKTPlatformAccountYTSadho,MKTPlatformSectionYTPinnedComment +m00033,MKTPlatformYoutube,MKTPlatformAccountYTSadho,MKTPlatformSectionYTCard +m00034,MKTPlatformYoutube,MKTPlatformAccountYTSadho,MKTPlatformSectionYTEndSlide +m00035,MKTPlatformYoutube,MKTPlatformAccountYTSadho,MKTPlatformSectionYTCommunity +m00036,MKTPlatformYoutube,MKTPlatformAccountYTShastraGyan,MKTPlatformSectionYTDescription +m00037,MKTPlatformYoutube,MKTPlatformAccountYTShastraGyan,MKTPlatformSectionYTPinnedComment +m00038,MKTPlatformYoutube,MKTPlatformAccountYTShastraGyan,MKTPlatformSectionYTCard +m00039,MKTPlatformYoutube,MKTPlatformAccountYTShastraGyan,MKTPlatformSectionYTEndSlide +m00040,MKTPlatformYoutube,MKTPlatformAccountYTShastraGyan,MKTPlatformSectionYTCommunity +m00041,MKTPlatformYoutube,MKTPlatformAccountYTYuvaMitra,MKTPlatformSectionYTDescription +m00042,MKTPlatformYoutube,MKTPlatformAccountYTYuvaMitra,MKTPlatformSectionYTPinnedComment +m00043,MKTPlatformYoutube,MKTPlatformAccountYTYuvaMitra,MKTPlatformSectionYTCard +m00044,MKTPlatformYoutube,MKTPlatformAccountYTYuvaMitra,MKTPlatformSectionYTEndSlide +m00045,MKTPlatformYoutube,MKTPlatformAccountYTYuvaMitra,MKTPlatformSectionYTCommunity +m00046,MKTPlatformYoutube,MKTPlatformAccountYTFreshBlades,MKTPlatformSectionYTDescription +m00047,MKTPlatformYoutube,MKTPlatformAccountYTFreshBlades,MKTPlatformSectionYTPinnedComment +m00048,MKTPlatformYoutube,MKTPlatformAccountYTFreshBlades,MKTPlatformSectionYTCard +m00049,MKTPlatformYoutube,MKTPlatformAccountYTFreshBlades,MKTPlatformSectionYTEndSlide +m00050,MKTPlatformYoutube,MKTPlatformAccountYTFreshBlades,MKTPlatformSectionYTCommunity +m00051,MKTPlatformYoutube,MKTPlatformAccountYTSaintsAndScriptures,MKTPlatformSectionYTDescription +m00052,MKTPlatformYoutube,MKTPlatformAccountYTSaintsAndScriptures,MKTPlatformSectionYTPinnedComment +m00053,MKTPlatformYoutube,MKTPlatformAccountYTSaintsAndScriptures,MKTPlatformSectionYTCard +m00054,MKTPlatformYoutube,MKTPlatformAccountYTSaintsAndScriptures,MKTPlatformSectionYTEndSlide +m00055,MKTPlatformYoutube,MKTPlatformAccountYTSaintsAndScriptures,MKTPlatformSectionYTCommunity +m00056,MKTPlatformYoutube,MKTPlatformAccountYTNotEvenOne,MKTPlatformSectionYTDescription +m00057,MKTPlatformYoutube,MKTPlatformAccountYTNotEvenOne,MKTPlatformSectionYTPinnedComment +m00058,MKTPlatformYoutube,MKTPlatformAccountYTNotEvenOne,MKTPlatformSectionYTCard +m00059,MKTPlatformYoutube,MKTPlatformAccountYTNotEvenOne,MKTPlatformSectionYTEndSlide +m00060,MKTPlatformYoutube,MKTPlatformAccountYTNotEvenOne,MKTPlatformSectionYTCommunity +m00061,MKTPlatformTwitter,MKTPlatformAccountTwitterHindi,MKTPlatformSectionTwitterFeed +m00062,MKTPlatformTwitter,MKTPlatformAccountTwitterEnglish,MKTPlatformSectionTwitterFeed +m00063,MKTPlatformApp,0,MKTPlatformSectionAppWisdomFeedH +m00064,MKTPlatformApp,0,MKTPlatformSectionAppWisdomFeedE +m00065,MKTPlatformKoo,MKTPlatformAccountKooHindi,0 +m00066,MKTPlatformKoo,MKTPlatformAccountKooEnglish,0 +m00067,MKTPlatformFacebookAds,0,0 +m00068,MKTPlatformInstagram,MKTPlatformAccountInstaMainHindi,MKTPlatformSectionInstaBroadcast +m00069,MKTPlatformInstagram,MKTPlatformAccountInstaMainEnglish,MKTPlatformSectionInstaBroadcast +m00070,MKTPlatformWebsite,0,MKTPlatformSectionWebsiteArticles +m00071,MKTPlatformWebsite,0,MKTPlatformSectionWebsiteHome +m00072,MKTPlatformApp,0,MKTPlatformSectionAppGitaFeed +m00073,MKTPlatformYoutube,MKTPlatformAccountYTMainHindi,MKTPlatformSectionYTCommentReply +m00074,MKTPlatformYoutube,MKTPlatformAccountYTMainEnglish,MKTPlatformSectionYTCommentReply +m00075,MKTPlatformWhatsappChannel,0,0 diff --git a/pointInPolygon.py b/data-transform/pointInPolygon.py similarity index 100% rename from pointInPolygon.py rename to data-transform/pointInPolygon.py diff --git a/download/downloadBuffettLetters.py b/download/downloadBuffettLetters.py new file mode 100644 index 0000000..bd62731 --- /dev/null +++ b/download/downloadBuffettLetters.py @@ -0,0 +1,38 @@ +import requests +from bs4 import BeautifulSoup + +# List of years to scrape +years = list(range(1977, 1998)) + +# Initialize an empty list to store the text content +all_text = [] + +# Loop through each year and download the text +for year in years: + url = f"https://www.berkshirehathaway.com/letters/%7Byear%7D.html" + print(f"Downloading text for {year}...") + + try: + response = requests.get(url) + + if response.status_code == 200: + soup = BeautifulSoup(response.content, "html.parser") + + # Extract the text content from the webpage + text = soup.get_text() + all_text.append(text) + print(f"Successfully downloaded text for {year}") + else: + print(f"Error fetching data for {year}") + except requests.exceptions.RequestException as e: + print(f"Error downloading text for {year}: {e}") + continue + +# Combine all the text content into a single string +combined_text = "\n\n".join(all_text) + +# Write the combined text to a file +with open("website_text.txt", "w", encoding="utf-8") as file: + file.write(combined_text) + +print("Text content saved to 'website_text.txt'.") \ No newline at end of file diff --git a/downloadYoutube.py b/download/downloadYoutube.py similarity index 56% rename from downloadYoutube.py rename to download/downloadYoutube.py index 6136a8b..2a97dd4 100755 --- a/downloadYoutube.py +++ b/download/downloadYoutube.py @@ -9,4 +9,6 @@ youtube_link = args["link"] -YouTube(youtube_link).streams.first().download() \ No newline at end of file +# YouTube(youtube_link).streams.first().download() +yt = YouTube(youtube_link) +yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').asc().first().download() \ No newline at end of file diff --git a/whatsapp.py b/download/whatsapp.py similarity index 100% rename from whatsapp.py rename to download/whatsapp.py diff --git a/ipcam.py b/ipcam.py deleted file mode 100644 index 17f7dae..0000000 --- a/ipcam.py +++ /dev/null @@ -1,53 +0,0 @@ -import cv2, base64, requests, json, os, re, argparse -import numpy as np -from time import sleep - -def main(camIp, camName): - print(camIp) - print(camName) - cap = cv2.VideoCapture("rtsp://"+camIp) - #url = 'http://localhost:8888/image/base64/' - url = 'http://one.innocule.tech/image/base64/' - - while(cap.isOpened()): - retval, image = cap.read() - _, im_arr = cv2.imencode('.jpg', image) - jpg_as_text = base64.b64encode(np.array(im_arr).tostring()).decode('utf-8') - #print(jpg_as_text[0:50]) - data = { 'image' : str(jpg_as_text), 'source' : camName} - headers = {'Content-type': 'application/json', 'Authorization':'Basic YWRtaW46SW5ub2NAMTIz'} - response = requests.post(url, json = data, headers = headers) - print(response.text) - break - - cap.release() - cv2.destroyAllWindows() - -if __name__=="__main__": - # construct the argument parser and parse the arguments - ap = argparse.ArgumentParser() - ap.add_argument("-i", "--cameraip", required=True, type=str, help="IP Camera host") - ap.add_argument("-n", "--cameraname", type=str, required=True, help="Camera identifier name") - args = vars(ap.parse_args()) - - camIP = args["cameraip"] - camName = args["cameraname"] - - main(camIP, camName) - -# while(cap.isOpened()): -# ret, frame = cap.read() -# cv2.imshow('frame', frame) -# if cv2.waitKey(20) & 0xFF == ord('q'): -# break -# cap.release() -# cv2.destroyAllWindows() -#im_bytes = im_arr.tobytes() -# mydata = np.fromstring(image, dtype=np.uint8) -# cv2.imwrite("abc.jpg", cap.read()[1]) -#from onvif import ONVIFCamera -#mycam = ONVIFCamera('192.168.1.64', 80, 'anshul', 'innocule7', '/etc/onvif/python-onvif/wsdl') - -# Get Hostname -#resp = mycam.devicemgmt.GetHostname() -#print 'My camera`s hostname: ' + str(resp.Name) \ No newline at end of file diff --git a/gpio.py b/raspi/gpio.py similarity index 100% rename from gpio.py rename to raspi/gpio.py diff --git a/audio2video.py b/video/audio2video.py similarity index 100% rename from audio2video.py rename to video/audio2video.py diff --git a/checkFPS.py b/video/checkFPS.py similarity index 100% rename from checkFPS.py rename to video/checkFPS.py diff --git a/extractFrames.py b/video/extractFrames.py similarity index 100% rename from extractFrames.py rename to video/extractFrames.py diff --git a/video/ipcam.py b/video/ipcam.py new file mode 100644 index 0000000..d7c4c37 --- /dev/null +++ b/video/ipcam.py @@ -0,0 +1,54 @@ +import cv2, base64, requests, json, os, re, argparse, threading, logging +from logging.handlers import RotatingFileHandler +from os.path import expanduser + +import numpy as np +from time import sleep + +formatter = logging.Formatter("[%(levelname)s] (%(asctime)s) {%(filename)s:%(lineno)d} | %(name)s | %(message)s") +log_file_path = expanduser("~") + '/logs/ipcam.log' +handler = RotatingFileHandler(log_file_path, maxBytes=1000000, backupCount=2) +handler.setLevel(logging.DEBUG) +handler.setFormatter(formatter) +logger = logging.getLogger('IPCam') +logger.addHandler(handler) +logger.setLevel(logging.DEBUG) + + +#url = 'http://localhost:8888/image/base64/' +url = 'http://one.innocule.tech/image/base64/' + +def main(delay): + ipcamThread01 = threading.Thread(target=ipcam_1, args=("rtsp://admin:innocule7@192.168.1.64", "cam_01", delay)) + ipcamThread02 = threading.Thread(target=ipcam_1, args=("rtsp://admin:innocule7@192.168.1.64", "cam_02", delay)) + ipcamThread01.start() + ipcamThread02.start() + +def ipcam_1(camUrl, camName, delay): + logger.debug('Thread started for '+camName) + cap = cv2.VideoCapture(camUrl) + while(True): + cap = cv2.VideoCapture(camUrl) + retval, image = cap.read() + _, im_arr = cv2.imencode('.jpg', image) + jpg_as_text = base64.b64encode(np.array(im_arr).tostring()).decode('utf-8') + #print(jpg_as_text[0:50]) + data = { 'image' : str(jpg_as_text), 'source' : camName} + headers = {'Content-type': 'application/json', 'Authorization':'Basic YWRtaW46SW5ub2NAMTIz'} + try: + response = requests.post(url, json = data, headers = headers) + logger.debug('Response from cloud '+response.text+ 'for cam: ' +camName) + except: + logger.error("Error in sending request for cam: "+camName, exc_info=1) + sleep(int(delay)) + + cap.release() + cv2.destroyAllWindows() + +if __name__=="__main__": + logger.debug('='*5+" Starting Applicaiton "+'='*5) + ap = argparse.ArgumentParser() + ap.add_argument("-d", "--framedelay", required=True, type=str, help="Take frame every these many seconds") + args = vars(ap.parse_args()) + delay = args["framedelay"] + main(delay) \ No newline at end of file diff --git a/joinSplitVideo.py b/video/joinSplitVideo.py similarity index 89% rename from joinSplitVideo.py rename to video/joinSplitVideo.py index 2683528..d579da2 100755 --- a/joinSplitVideo.py +++ b/video/joinSplitVideo.py @@ -5,7 +5,7 @@ def main(): inputFile = "/Users/anshul/Downloads/output/to-cut.mp4" outputLocation = "/Users/anshul/Downloads/output/" - split(inputFile, outputLocation) + # split(inputFile, outputLocation) join() @@ -17,11 +17,11 @@ def split(vidfile, output): def join(): folder_prefix = '/Users/anshul/Downloads/output/' - numbers = re.compile(r'(\d+)') + # numbers = re.compile(r'(\d+)') # this two lines are for loading the videos. In this case the video are named as: cut1.mp4, cut2.mp4, ..., cut15.mp4 videofiles = [n for n in os.listdir(folder_prefix) if n[0]=='o' and n[-4:]=='.avi'] - videofiles = sorted(videofiles, key=numericalSort) + # videofiles = sorted(videofiles, key=numericalSort) print(videofiles) video_index = 0 @@ -33,7 +33,7 @@ def join(): final_clip = concatenate_videoclips(clips) final_clip.write_videofile(folder_prefix+'joined-video.mp4') - print "end." + print("end.") def numericalSort(value): diff --git a/video/joinVideos.py b/video/joinVideos.py new file mode 100644 index 0000000..4effb0d --- /dev/null +++ b/video/joinVideos.py @@ -0,0 +1,12 @@ +from moviepy.editor import * + +# getting subclip as video is large +clip1 = VideoFileClip("/Users/anshul/Downloads/fal-aur-bandariya.mp4") +# clip2 = VideoFileClip("/Users/anshul/Downloads/fal-aur-bandariya.mp4") +# clip3 = VideoFileClip("/Users/anshul/Downloads/PAF_new_web_arch_training_part-3.mp4") +# clip4 = VideoFileClip("/Users/anshul/Downloads/output/large-4k-video-4.mp4") + +# concatenating both the clips +final = concatenate_videoclips([clip1, clip1]) +#writing the video into a file / saving the combined video +final.write_videofile("/Users/anshul/Downloads/fal-aur-bandariya-long.mp4") \ No newline at end of file diff --git a/removeSilenceFromVideo.py b/video/removeSilenceFromVideo.py similarity index 100% rename from removeSilenceFromVideo.py rename to video/removeSilenceFromVideo.py diff --git a/videoStreamingRaspi.py b/video/videoStreamingRaspi.py similarity index 100% rename from videoStreamingRaspi.py rename to video/videoStreamingRaspi.py From ade5319aff79ac651c1f41c18009d4898d88d0aa Mon Sep 17 00:00:00 2001 From: Anshul Date: Fri, 20 Sep 2024 16:39:49 +0530 Subject: [PATCH 08/16] virtual env dependencies updated --- environment.yml | 124 ++++++++++++++++++++++++----------------------- requirements.txt | 45 ----------------- 2 files changed, 63 insertions(+), 106 deletions(-) delete mode 100644 requirements.txt diff --git a/environment.yml b/environment.yml index 8fb24b6..0a5123c 100644 --- a/environment.yml +++ b/environment.yml @@ -1,66 +1,68 @@ name: python-utils channels: + - conda-forge - defaults dependencies: - - ca-certificates=2020.7.22=0 - - certifi=2020.6.20=py38_0 - - libcxx=10.0.0=1 - - libedit=3.1.20191231=h1de35cc_1 - - libffi=3.3=hb1e8313_2 - - ncurses=6.2=h0a44026_1 - - openssl=1.1.1h=haf1e3a3_0 - - pip=20.2.2=py38_0 - - python=3.8.5=h26836e1_1 - - readline=8.0=h1de35cc_0 - - setuptools=49.6.0=py38_0 - - sqlite=3.33.0=hffcf06c_0 - - tk=8.6.10=hb0a8c7a_0 - - wheel=0.35.1=py_0 - - xz=5.2.5=h1de35cc_0 - - zlib=1.2.11=h1de35cc_3 + - bzip2=1.0.8=h99b78c6_7 + - ca-certificates=2024.7.4=hf0a4a13_0 + - libexpat=2.6.2=hebf3989_0 + - libffi=3.4.2=h3422bc3_5 + - libsqlite=3.46.0=hfb93653_0 + - libzlib=1.3.1=hfb2fe0b_1 + - ncurses=6.5=hb89a1cb_0 + - openssl=3.3.1=hfb2fe0b_2 + - pip=24.2=pyhd8ed1ab_0 + - python=3.11.9=h932a869_0_cpython + - readline=8.2=h92ec313_1 + - setuptools=72.1.0=pyhd8ed1ab_0 + - tk=8.6.13=h5083fa2_1 + - tzdata=2024a=h0c530f3_0 + - wheel=0.44.0=pyhd8ed1ab_0 + - xz=5.2.6=h57fd34a_0 - pip: - - appdirs==1.4.4 - - audioread==2.1.9 - - audiotsm==0.1.2 - - cffi==1.14.4 - - chardet==4.0.0 - - decorator==4.4.2 - - ffmpeg-python==0.2.0 - - fire==0.3.1 - - future==0.18.2 - - idna==2.10 - - imageio==2.9.0 - - imageio-ffmpeg==0.4.3 - - iso8601==0.1.13 - - joblib==1.0.0 - - librosa==0.8.0 - - llvmlite==0.35.0 - - lxml==4.6.2 - - m3u8==0.8.0 - - moviepy==1.0.3 - - numba==0.52.0 - - numpy==1.19.4 - - packaging==20.8 - - pikepdf==2.2.2 - - pillow==8.0.1 - - pooch==1.3.0 - - proglog==0.1.9 - - psutil==5.8.0 - - pycparser==2.20 - - pyparsing==2.4.7 - - pypdf2==1.26.0 - - pytube==10.4.1 - - requests==2.25.1 - - resampy==0.2.2 - - scikit-learn==0.24.0 - - scipy==1.5.4 - - selenium==3.141.0 - - six==1.15.0 - - soundfile==0.10.3.post1 - - termcolor==1.1.0 - - threadpoolctl==2.1.0 - - tqdm==4.56.0 - - typing-extensions==3.7.4.3 - - urllib3==1.26.2 - - whatsapp-web==0.0.1 - + - attrs==24.1.0 + - beautifulsoup4==4.12.3 + - blinker==1.8.2 + - cachetools==5.5.0 + - certifi==2024.7.4 + - chardet==5.2.0 + - charset-normalizer==3.3.2 + - click==8.1.7 + - flask==3.0.3 + - fpdf==1.7.2 + - google-api-core==2.20.0 + - google-api-python-client==2.146.0 + - google-auth==2.35.0 + - google-auth-httplib2==0.2.0 + - googleapis-common-protos==1.65.0 + - h11==0.14.0 + - httplib2==0.22.0 + - idna==3.7 + - itsdangerous==2.2.0 + - jinja2==3.1.4 + - markupsafe==2.1.5 + - networkx==3.3 + - outcome==1.3.0.post0 + - pillow==10.4.0 + - proto-plus==1.24.0 + - protobuf==5.28.2 + - pyasn1==0.6.1 + - pyasn1-modules==0.4.1 + - pyparsing==3.1.4 + - pysocks==1.7.1 + - reportlab==4.2.2 + - requests==2.32.3 + - rsa==4.9 + - selenium==4.23.1 + - sniffio==1.3.1 + - sortedcontainers==2.4.0 + - soupsieve==2.5 + - trio==0.26.1 + - trio-websocket==0.11.1 + - typing-extensions==4.12.2 + - uritemplate==4.1.1 + - urllib3==2.2.2 + - websocket-client==1.8.0 + - werkzeug==3.0.4 + - wsproto==1.2.0 +prefix: /Users/anshul/anaconda3/envs/python-utils diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 8b013d1..0000000 --- a/requirements.txt +++ /dev/null @@ -1,45 +0,0 @@ -appdirs==1.4.4 -audioread==2.1.9 -audiotsm==0.1.2 -certifi==2020.6.20 -cffi==1.14.4 -chardet==4.0.0 -decorator==4.4.2 -ffmpeg-python==0.2.0 -fire==0.3.1 -future==0.18.2 -idna==2.10 -imageio==2.9.0 -imageio-ffmpeg==0.4.3 -iso8601==0.1.13 -joblib==1.0.0 -librosa==0.8.0 -llvmlite==0.35.0 -lxml==4.6.2 -m3u8==0.8.0 -moviepy==1.0.3 -numba==0.52.0 -numpy==1.19.4 -packaging==20.8 -pikepdf==2.2.2 -Pillow==8.0.1 -pooch==1.3.0 -proglog==0.1.9 -psutil==5.8.0 -pycparser==2.20 -pyparsing==2.4.7 -PyPDF2==1.26.0 -pytube==10.4.1 -requests==2.25.1 -resampy==0.2.2 -scikit-learn==0.24.0 -scipy==1.5.4 -selenium==3.141.0 -six==1.15.0 -SoundFile==0.10.3.post1 -termcolor==1.1.0 -threadpoolctl==2.1.0 -tqdm==4.56.0 -typing-extensions==3.7.4.3 -urllib3==1.26.2 -whatsapp-web==0.0.1 From 344a7639dcf144b52a5702cfe45cc5615dfff3b6 Mon Sep 17 00:00:00 2001 From: Anshul Date: Fri, 20 Sep 2024 18:22:16 +0530 Subject: [PATCH 09/16] download playlist details --- YT/playlistInfo.py | 33 ++++++++++++++++++++++++++--- YT/playlist_titles.txt | 47 ------------------------------------------ 2 files changed, 30 insertions(+), 50 deletions(-) delete mode 100644 YT/playlist_titles.txt diff --git a/YT/playlistInfo.py b/YT/playlistInfo.py index 0fab295..6fdcdba 100644 --- a/YT/playlistInfo.py +++ b/YT/playlistInfo.py @@ -1,4 +1,5 @@ import os +import re import sys from googleapiclient.discovery import build @@ -12,6 +13,29 @@ # Set up the YouTube API client youtube = build('youtube', 'v3', developerKey=api_key) +def get_video_details(video_id): + request = youtube.videos().list( + part="contentDetails", + id=video_id + ) + response = request.execute() + return response['items'][0]['contentDetails'] if response['items'] else None + +def parse_duration(duration): + match = re.match(r'PT(\d+H)?(\d+M)?(\d+S)?', duration) + if not match: + return 0 + + hours = int(match.group(1)[:-1]) if match.group(1) else 0 + minutes = int(match.group(2)[:-1]) if match.group(2) else 0 + seconds = int(match.group(3)[:-1]) if match.group(3) else 0 + + return hours * 3600 + minutes * 60 + seconds + +def is_short(duration): + # YouTube Shorts are typically 60 seconds or less + return parse_duration(duration) <= 60 + def get_playlist_titles(playlist_id): titles = [] next_page_token = None @@ -26,9 +50,12 @@ def get_playlist_titles(playlist_id): ) response = request.execute() - # Extract video titles + # Extract video details and filter out shorts for item in response['items']: - titles.append(item['snippet']['title']) + video_id = item['snippet']['resourceId']['videoId'] + video_details = get_video_details(video_id) + if video_details and not is_short(video_details['duration']): + titles.append(item['snippet']['title']) # Check if there are more pages next_page_token = response.get('nextPageToken') @@ -54,7 +81,7 @@ def main(): for title in video_titles: f.write(f"{title}\n") - print(f"\nTitles have been saved to playlist_titles.txt") + print(f"\nTitles of {len(video_titles)} regular videos have been saved to playlist_titles.txt") if __name__ == "__main__": main() \ No newline at end of file diff --git a/YT/playlist_titles.txt b/YT/playlist_titles.txt deleted file mode 100644 index ded66ad..0000000 --- a/YT/playlist_titles.txt +++ /dev/null @@ -1,47 +0,0 @@ -Leander Paes on the Power of One Percent, and Dealing with Triumphs and Disasters in Tennis and Life -Kenneth Andrade on Spotting Great Businesses, Market Cycles, and Playing the Long Game of Investing -Bogumil Baranowski on Building Generational Wealth and Playing the Infinite Game of Investing -Navneet Munot on India’s Amrit Kaal, Role of Luck, Building Resilience, and the Power of Kindness -Shyam Sekhar on Mastering the Long Game of #Investing -Rohit Chauhan on Difficulties of Compounding, Life’s True Riches, and Power of Investing Simplicity -Swanand Kelkar on the Purpose of Life, Dealing with Boring Nothingness, and Playing the Long Game -Nilesh Shah on Transformation of India, Lifelong Learning, and Importance of Humility in Investing -Investing is all about Probability - Morgan Housel - The One Percent Show #shorts -Life is about Leaps - Vinod Sethi - The One Percent Show #shorts -The Only Purpose of Building Wealth - Morgan Housel - The One Percent Show #shorts -Writing Looks Easier Than It is - Morgan Housel - The One Percent Show #Shorts -Mohnish Pabrai on Playing Your Own Investing Game #shorts -Annie Duke on Why Life is Like Poker, Anatomy of Decision Making, and the Power of Quitting -Guy Spier on the India Opportunity, Long Term Investing, and the Power of Checklists -Vitaliy Katsenelson on Thoughtful Arrogance, Stoicism, and Having Soul in the Game -PD Mangan on Reverse Ageing, Diet and Exercise, and Solution to the Epidemic of Chronic Diseases -Kalpen Parekh on Financial Freedom, Universal Truths of Investing, and the Idea of Lifelong Learning -William Green on Charlie Munger, Good Parenting, and Secrets of a Happy and Abundant Life -Nick Maggiulli on Biggest Lies in Personal Finance, Feeling Rich, and the Role of Luck in Investing -Ian Cassel on Microcap Investing, Dealing with Self-Doubt, and the Secret of Happiness -Is Investing a Game of Skill or Luck? -Kuntal Shah on Market Cycles, Lessons from Financial History, and Twelve Equations of Life -Guy Spier on Warren Buffett's Annual Meeting and the Power of Your Social Network -Rajeev Thakkar on Investing Character, Lifelong Learning, and Building an Antifragile Life -India of 2050 - Vinod Sethi on The One Percent Show with Vishal Khandelwal -Guy Spier on the Powerful Idea of Writing Thank You Notes - The One Percent Show -Guy Spier on Finding Your North Star and the Power of Compounding Goodwill - The One Percent Show -India is Hope for Humanity - Guy Spier - The One Percent Show -The Cycle of Life - Manish Chokhani - The One Percent Show #Shorts -Two Kinds of People - Arnold Van Den Berg - The One Percent Show #Shorts -Arnold Van Den Berg on the Power of Your Subconscious Mind -The One Percent Show - Intro -Ramesh Damani on Focus, Backing Up Your Truck, and Lessons from the Wizards of Dalal Street -Sankaran Naren on Market Cycles, Common Sense, and Art of Being a Contrarian Investor -Monika Halan on Money Mantras, Biggest Financial Regrets, and Handling Life's Real Wealth Well -A Lesson on Kindness - Barry Ritholtz - The One Percent Show with Vishal Khandelwal -Barry Ritholtz on Survivorship Bias, Thinking Independently, and Creating Meaning in Life -Samit Vartak on Success vs Happiness and the Art of Managing Other People's Money -Harsh Mariwala on Risk-Taking, Dealing with Failures, and Building a Sound Corporate Culture -Saurabh Mukherjea on Compounding, Wealth Creation, and Seizing the India Opportunity -Sanjay Bakshi on Teaching, Spirituality, and Dealing with Uncertainties in Life and Investing -Morgan Housel on Seeking Simplicity in a Complex World - The One Percent Show -Mohnish Pabrai on Learning from Mistakes and Reinventing Yourself - The One Percent Show -Radhika Gupta on Living a Brave Life and Creating Your Own Destiny -The One Percent Show with Vishal Khandelwal - Ep. 2 - Vinod Sethi -The One Percent Show with Vishal Khandelwal - Ep. 1 - Manish Chokhani From 429e85b24423d9674f9b2be9c5b996774e41ee8b Mon Sep 17 00:00:00 2001 From: Anshul Date: Sat, 21 Sep 2024 18:32:48 +0530 Subject: [PATCH 10/16] Download timestamped transcript for a video from YT --- YT/videoTranscript.py | 61 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 YT/videoTranscript.py diff --git a/YT/videoTranscript.py b/YT/videoTranscript.py new file mode 100644 index 0000000..f33c9d9 --- /dev/null +++ b/YT/videoTranscript.py @@ -0,0 +1,61 @@ +import os +import sys + +import requests +from youtube_transcript_api import YouTubeTranscriptApi + +# Read the video ID from command-line arguments +if len(sys.argv) < 2: + print("Please provide the YouTube video ID as a command-line argument.") + sys.exit(1) + +VIDEO_ID = sys.argv[1] +API_KEY = os.getenv('YOUTUBE_API_KEY') + +def download_transcript(): + try: + return YouTubeTranscriptApi.get_transcript(VIDEO_ID) + except Exception as e: + print(f"Error downloading transcript: {str(e)}") + return None + +def format_transcript(transcript): + formatted = "" + for entry in transcript: + start_time = int(entry['start']) + minutes, seconds = divmod(start_time, 60) + hours, minutes = divmod(minutes, 60) + timestamp = f"{hours:02d}:{minutes:02d}:{seconds:02d}" + formatted += f"[{timestamp}] {entry['text']}\n" + return formatted + +def get_video_title(video_id): + url = f'https://www.googleapis.com/youtube/v3/videos?part=snippet&id={video_id}&key={API_KEY}' + response = requests.get(url) + if response.status_code == 200: + data = response.json() + return data['items'][0]['snippet']['title'] + else: + print(f'Error fetching video title: {response.status_code}') + return None + +def save_transcript(transcript, title): + filename = f"{title}.txt" + with open(filename, 'w', encoding='utf-8') as f: + f.write(transcript) + print(f'Transcript saved to {filename}') + +# Call the function to download +transcript = download_transcript() + +if transcript: + print('Transcript downloaded successfully.') + formatted_transcript = format_transcript(transcript) + video_title = get_video_title(VIDEO_ID) + if video_title: + save_transcript(formatted_transcript, video_title) + else: + print('Failed to get video title. Saving with video ID.') + save_transcript(formatted_transcript, VIDEO_ID) +else: + print('Failed to download transcript.') \ No newline at end of file From cb20f31bc98039c5424759b2a0bf5f390b55a384 Mon Sep 17 00:00:00 2001 From: Anshul Date: Sat, 21 Sep 2024 19:06:04 +0530 Subject: [PATCH 11/16] Download transcript of all the videos in a playlist --- YT/videoTranscript.py | 75 ++++++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 30 deletions(-) diff --git a/YT/videoTranscript.py b/YT/videoTranscript.py index f33c9d9..40d22ae 100644 --- a/YT/videoTranscript.py +++ b/YT/videoTranscript.py @@ -4,19 +4,38 @@ import requests from youtube_transcript_api import YouTubeTranscriptApi -# Read the video ID from command-line arguments +# Read the playlist ID from command-line arguments if len(sys.argv) < 2: - print("Please provide the YouTube video ID as a command-line argument.") + print("Please provide the YouTube playlist ID as a command-line argument.") sys.exit(1) -VIDEO_ID = sys.argv[1] +PLAYLIST_ID = sys.argv[1] API_KEY = os.getenv('YOUTUBE_API_KEY') -def download_transcript(): +def get_playlist_items(playlist_id): + items = [] + next_page_token = None + while True: + url = f'https://www.googleapis.com/youtube/v3/playlistItems?part=snippet&maxResults=50&playlistId={playlist_id}&key={API_KEY}' + if next_page_token: + url += f'&pageToken={next_page_token}' + response = requests.get(url) + if response.status_code == 200: + data = response.json() + items.extend(data['items']) + next_page_token = data.get('nextPageToken') + if not next_page_token: + break + else: + print(f'Error fetching playlist items: {response.status_code}') + return None + return items + +def download_transcript(video_id): try: - return YouTubeTranscriptApi.get_transcript(VIDEO_ID) + return YouTubeTranscriptApi.get_transcript(video_id) except Exception as e: - print(f"Error downloading transcript: {str(e)}") + print(f"Error downloading transcript for video {video_id}: {str(e)}") return None def format_transcript(transcript): @@ -29,33 +48,29 @@ def format_transcript(transcript): formatted += f"[{timestamp}] {entry['text']}\n" return formatted -def get_video_title(video_id): - url = f'https://www.googleapis.com/youtube/v3/videos?part=snippet&id={video_id}&key={API_KEY}' - response = requests.get(url) - if response.status_code == 200: - data = response.json() - return data['items'][0]['snippet']['title'] - else: - print(f'Error fetching video title: {response.status_code}') - return None - def save_transcript(transcript, title): - filename = f"{title}.txt" + # Remove any characters that are not allowed in filenames + safe_title = ''.join(c for c in title if c.isalnum() or c in (' ', '.', '_')).rstrip() + filename = f"{safe_title}.txt" with open(filename, 'w', encoding='utf-8') as f: f.write(transcript) print(f'Transcript saved to {filename}') -# Call the function to download -transcript = download_transcript() - -if transcript: - print('Transcript downloaded successfully.') - formatted_transcript = format_transcript(transcript) - video_title = get_video_title(VIDEO_ID) - if video_title: - save_transcript(formatted_transcript, video_title) - else: - print('Failed to get video title. Saving with video ID.') - save_transcript(formatted_transcript, VIDEO_ID) +# Get all videos in the playlist +playlist_items = get_playlist_items(PLAYLIST_ID) + +if playlist_items: + print(f'Found {len(playlist_items)} videos in the playlist.') + for item in playlist_items: + video_id = item['snippet']['resourceId']['videoId'] + video_title = item['snippet']['title'] + print(f'Processing video: {video_title}') + + transcript = download_transcript(video_id) + if transcript: + formatted_transcript = format_transcript(transcript) + save_transcript(formatted_transcript, video_title) + else: + print(f'Failed to download transcript for video: {video_title}') else: - print('Failed to download transcript.') \ No newline at end of file + print('Failed to retrieve playlist items.') \ No newline at end of file From 3e97863ead8acceff36b6a5c05c7ac218089ace8 Mon Sep 17 00:00:00 2001 From: Anshul Date: Sat, 21 Sep 2024 19:20:13 +0530 Subject: [PATCH 12/16] Modify the prefixed timestamp string in the final output file --- data-transform/mergeTxtFiles.py | 36 ++++++++++++++++ data-transform/output.csv | 75 --------------------------------- 2 files changed, 36 insertions(+), 75 deletions(-) create mode 100644 data-transform/mergeTxtFiles.py delete mode 100644 data-transform/output.csv diff --git a/data-transform/mergeTxtFiles.py b/data-transform/mergeTxtFiles.py new file mode 100644 index 0000000..5f73486 --- /dev/null +++ b/data-transform/mergeTxtFiles.py @@ -0,0 +1,36 @@ +import argparse +import os +import re + + +def time_to_seconds(time_str): + h, m, s = map(int, time_str.split(':')) + return h * 3600 + m * 60 + s + +def merge_txt_files(folder_path): + output_file = os.path.join(folder_path, 'output.txt') + + with open(output_file, 'w') as outfile: + # Sort files to ensure consistent ordering + files = sorted([f for f in os.listdir(folder_path) if f.endswith(".txt") and f != 'output.txt']) + + for index, filename in enumerate(files, start=1): + file_path = os.path.join(folder_path, filename) + with open(file_path, 'r') as infile: + for line in infile: + # Extract timestamp + match = re.match(r'\[(\d{2}:\d{2}:\d{2})\](.*)', line) + if match: + time_str, content = match.groups() + seconds = time_to_seconds(time_str) + # Write the modified line + outfile.write(f"{index}-[{seconds}]{content.strip()}\n") + + print(f"All text files merged into {output_file}") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Merge all text files in the folder into output.txt") + parser.add_argument('folder_path', type=str, help='Path to the folder containing the text files') + + args = parser.parse_args() + merge_txt_files(args.folder_path) \ No newline at end of file diff --git a/data-transform/output.csv b/data-transform/output.csv deleted file mode 100644 index dad9edc..0000000 --- a/data-transform/output.csv +++ /dev/null @@ -1,75 +0,0 @@ -shortId,platform,platformAccount,platformSection -aplive,MKTPlatformWebsite,0,MKTPlatformSectionWebsiteGrace -m00001,MKTPlatformEmail,0,0 -m00002,MKTPlatformTelegram,0,0 -m00003,MKTPlatformQuora,0,0 -m00004,MKTPlatformLinkedIn,MKTPlatformAccountLinkedInAPProfile,MKTPlatformSectionLinkedInPage -m00005,MKTPlatformLinkedIn,MKTPlatformAccountLinkedInPAFPage,MKTPlatformSectionLinkedInPage -m00006,MKTPlatformGoogleAds,0,0 -m00007,MKTPlatformFacebook,MKTPlatformAccountFBMainHindi,MKTPlatformSectionFBPage -m00009,MKTPlatformFacebook,MKTPlatformAccountFBMainHindi,MKTPlatformSectionFBStories -m00010,MKTPlatformFacebook,MKTPlatformAccountFBMainEnglish,MKTPlatformSectionFBPage -m00012,MKTPlatformFacebook,MKTPlatformAccountFBMainEnglish,MKTPlatformSectionFBStories -m00013,MKTPlatformWhatsapp,MKTPlatformAccountWhatsAppOutreach,MKTPlatformSectionWhatsappBroadcast -m00014,MKTPlatformWhatsapp,MKTPlatformAccountWhatsAppOutreach,MKTPlatformSectionWhatsappStories -m00015,MKTPlatformWhatsapp,MKTPlatformAccountWhatsAppRP,MKTPlatformSectionWhatsappBroadcast -m00016,MKTPlatformWhatsapp,MKTPlatformAccountWhatsAppRP,MKTPlatformSectionWhatsappStories -m00017,MKTPlatformInstagram,MKTPlatformAccountInstaMainHindi,MKTPlatformSectionInstaStories -m00018,MKTPlatformInstagram,MKTPlatformAccountInstaMainHindi,MKTPlatformSectionInstaProfileLink -m00019,MKTPlatformInstagram,MKTPlatformAccountInstaMainEnglish,MKTPlatformSectionInstaStories -m00020,MKTPlatformInstagram,MKTPlatformAccountInstaMainEnglish,MKTPlatformSectionInstaProfileLink -m00021,MKTPlatformYoutube,MKTPlatformAccountYTMainHindi,MKTPlatformSectionYTDescription -m00022,MKTPlatformYoutube,MKTPlatformAccountYTMainHindi,MKTPlatformSectionYTPinnedComment -m00023,MKTPlatformYoutube,MKTPlatformAccountYTMainHindi,MKTPlatformSectionYTCard -m00024,MKTPlatformYoutube,MKTPlatformAccountYTMainHindi,MKTPlatformSectionYTEndSlide -m00025,MKTPlatformYoutube,MKTPlatformAccountYTMainHindi,MKTPlatformSectionYTCommunity -m00026,MKTPlatformYoutube,MKTPlatformAccountYTMainEnglish,MKTPlatformSectionYTDescription -m00027,MKTPlatformYoutube,MKTPlatformAccountYTMainEnglish,MKTPlatformSectionYTPinnedComment -m00028,MKTPlatformYoutube,MKTPlatformAccountYTMainEnglish,MKTPlatformSectionYTCard -m00029,MKTPlatformYoutube,MKTPlatformAccountYTMainEnglish,MKTPlatformSectionYTEndSlide -m00030,MKTPlatformYoutube,MKTPlatformAccountYTMainEnglish,MKTPlatformSectionYTCommunity -m00031,MKTPlatformYoutube,MKTPlatformAccountYTSadho,MKTPlatformSectionYTDescription -m00032,MKTPlatformYoutube,MKTPlatformAccountYTSadho,MKTPlatformSectionYTPinnedComment -m00033,MKTPlatformYoutube,MKTPlatformAccountYTSadho,MKTPlatformSectionYTCard -m00034,MKTPlatformYoutube,MKTPlatformAccountYTSadho,MKTPlatformSectionYTEndSlide -m00035,MKTPlatformYoutube,MKTPlatformAccountYTSadho,MKTPlatformSectionYTCommunity -m00036,MKTPlatformYoutube,MKTPlatformAccountYTShastraGyan,MKTPlatformSectionYTDescription -m00037,MKTPlatformYoutube,MKTPlatformAccountYTShastraGyan,MKTPlatformSectionYTPinnedComment -m00038,MKTPlatformYoutube,MKTPlatformAccountYTShastraGyan,MKTPlatformSectionYTCard -m00039,MKTPlatformYoutube,MKTPlatformAccountYTShastraGyan,MKTPlatformSectionYTEndSlide -m00040,MKTPlatformYoutube,MKTPlatformAccountYTShastraGyan,MKTPlatformSectionYTCommunity -m00041,MKTPlatformYoutube,MKTPlatformAccountYTYuvaMitra,MKTPlatformSectionYTDescription -m00042,MKTPlatformYoutube,MKTPlatformAccountYTYuvaMitra,MKTPlatformSectionYTPinnedComment -m00043,MKTPlatformYoutube,MKTPlatformAccountYTYuvaMitra,MKTPlatformSectionYTCard -m00044,MKTPlatformYoutube,MKTPlatformAccountYTYuvaMitra,MKTPlatformSectionYTEndSlide -m00045,MKTPlatformYoutube,MKTPlatformAccountYTYuvaMitra,MKTPlatformSectionYTCommunity -m00046,MKTPlatformYoutube,MKTPlatformAccountYTFreshBlades,MKTPlatformSectionYTDescription -m00047,MKTPlatformYoutube,MKTPlatformAccountYTFreshBlades,MKTPlatformSectionYTPinnedComment -m00048,MKTPlatformYoutube,MKTPlatformAccountYTFreshBlades,MKTPlatformSectionYTCard -m00049,MKTPlatformYoutube,MKTPlatformAccountYTFreshBlades,MKTPlatformSectionYTEndSlide -m00050,MKTPlatformYoutube,MKTPlatformAccountYTFreshBlades,MKTPlatformSectionYTCommunity -m00051,MKTPlatformYoutube,MKTPlatformAccountYTSaintsAndScriptures,MKTPlatformSectionYTDescription -m00052,MKTPlatformYoutube,MKTPlatformAccountYTSaintsAndScriptures,MKTPlatformSectionYTPinnedComment -m00053,MKTPlatformYoutube,MKTPlatformAccountYTSaintsAndScriptures,MKTPlatformSectionYTCard -m00054,MKTPlatformYoutube,MKTPlatformAccountYTSaintsAndScriptures,MKTPlatformSectionYTEndSlide -m00055,MKTPlatformYoutube,MKTPlatformAccountYTSaintsAndScriptures,MKTPlatformSectionYTCommunity -m00056,MKTPlatformYoutube,MKTPlatformAccountYTNotEvenOne,MKTPlatformSectionYTDescription -m00057,MKTPlatformYoutube,MKTPlatformAccountYTNotEvenOne,MKTPlatformSectionYTPinnedComment -m00058,MKTPlatformYoutube,MKTPlatformAccountYTNotEvenOne,MKTPlatformSectionYTCard -m00059,MKTPlatformYoutube,MKTPlatformAccountYTNotEvenOne,MKTPlatformSectionYTEndSlide -m00060,MKTPlatformYoutube,MKTPlatformAccountYTNotEvenOne,MKTPlatformSectionYTCommunity -m00061,MKTPlatformTwitter,MKTPlatformAccountTwitterHindi,MKTPlatformSectionTwitterFeed -m00062,MKTPlatformTwitter,MKTPlatformAccountTwitterEnglish,MKTPlatformSectionTwitterFeed -m00063,MKTPlatformApp,0,MKTPlatformSectionAppWisdomFeedH -m00064,MKTPlatformApp,0,MKTPlatformSectionAppWisdomFeedE -m00065,MKTPlatformKoo,MKTPlatformAccountKooHindi,0 -m00066,MKTPlatformKoo,MKTPlatformAccountKooEnglish,0 -m00067,MKTPlatformFacebookAds,0,0 -m00068,MKTPlatformInstagram,MKTPlatformAccountInstaMainHindi,MKTPlatformSectionInstaBroadcast -m00069,MKTPlatformInstagram,MKTPlatformAccountInstaMainEnglish,MKTPlatformSectionInstaBroadcast -m00070,MKTPlatformWebsite,0,MKTPlatformSectionWebsiteArticles -m00071,MKTPlatformWebsite,0,MKTPlatformSectionWebsiteHome -m00072,MKTPlatformApp,0,MKTPlatformSectionAppGitaFeed -m00073,MKTPlatformYoutube,MKTPlatformAccountYTMainHindi,MKTPlatformSectionYTCommentReply -m00074,MKTPlatformYoutube,MKTPlatformAccountYTMainEnglish,MKTPlatformSectionYTCommentReply -m00075,MKTPlatformWhatsappChannel,0,0 From bc7bcc7b4fa5acb00804e373145f7373d105e6e7 Mon Sep 17 00:00:00 2001 From: Anshul Date: Mon, 23 Sep 2024 11:06:51 +0530 Subject: [PATCH 13/16] Update README.md Added License Description --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 37f6cbb..f34ef10 100644 --- a/README.md +++ b/README.md @@ -21,3 +21,11 @@ Some useful python code snippets and utilities. [Merge PDF](https://github.com/anshulkhare7/PythonUtils/blob/master/mergePDF.py) - Merge PDF files using PyPDF2 package. Install with `pip install pypdf2`. Based on [this.](https://realpython.com/pdf-python/). Requires python 3.8. [Convert Audio to Video](https://github.com/anshulkhare7/PythonUtils/blob/master/audio2video.py) - Convert audio file to video file. + + +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Twitter](https://img.shields.io/twitter/follow/_anshulkhare?style=social)](https://twitter.com/_anshulkhare) + +# License + +This repository is released under the [MIT license](https://opensource.org/licenses/MIT). In short, this means you are free to use this software in any personal, open-source or commercial projects. Attribution is optional but appreciated. + From ad7b6aec461908baf3432dfd20fd2ed591ad0c56 Mon Sep 17 00:00:00 2001 From: Anshul Date: Fri, 27 Sep 2024 16:47:31 +0530 Subject: [PATCH 14/16] client for sendgrid --- .gitignore | 3 ++ YT/videoTranscript.py | 102 +++++++++++++++++++++++++++++++----------- sendGrid.py | 20 +++++++++ 3 files changed, 99 insertions(+), 26 deletions(-) create mode 100644 sendGrid.py diff --git a/.gitignore b/.gitignore index 2383f01..7af56d7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +transcripts/* + .DS_Store .idea/* @@ -134,3 +136,4 @@ dmypy.json # Pyre type checker .pyre/ +sendgrid.env diff --git a/YT/videoTranscript.py b/YT/videoTranscript.py index 40d22ae..08c4ff9 100644 --- a/YT/videoTranscript.py +++ b/YT/videoTranscript.py @@ -1,15 +1,10 @@ +import argparse import os import sys import requests -from youtube_transcript_api import YouTubeTranscriptApi +from youtube_transcript_api import TranscriptsDisabled, YouTubeTranscriptApi -# Read the playlist ID from command-line arguments -if len(sys.argv) < 2: - print("Please provide the YouTube playlist ID as a command-line argument.") - sys.exit(1) - -PLAYLIST_ID = sys.argv[1] API_KEY = os.getenv('YOUTUBE_API_KEY') def get_playlist_items(playlist_id): @@ -33,10 +28,30 @@ def get_playlist_items(playlist_id): def download_transcript(video_id): try: - return YouTubeTranscriptApi.get_transcript(video_id) - except Exception as e: - print(f"Error downloading transcript for video {video_id}: {str(e)}") + # First, try to get the English transcript + return YouTubeTranscriptApi.get_transcript(video_id, languages=['en']) + except TranscriptsDisabled: + print(f"Transcripts are disabled for video {video_id}") return None + except Exception as e: + if "No transcripts were found" in str(e): + print(f"No English transcript found for video {video_id}. Trying other languages...") + try: + # If English is not available, get a list of available transcripts + transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) + + # Try to get the transcript in any available language + for transcript in transcript_list: + return transcript.fetch() + + print(f"No transcripts found in any language for video {video_id}") + return None + except Exception as inner_e: + print(f"Error fetching transcript for video {video_id}: {str(inner_e)}") + return None + else: + print(f"Error downloading transcript for video {video_id}: {str(e)}") + return None def format_transcript(transcript): formatted = "" @@ -49,28 +64,63 @@ def format_transcript(transcript): return formatted def save_transcript(transcript, title): - # Remove any characters that are not allowed in filenames safe_title = ''.join(c for c in title if c.isalnum() or c in (' ', '.', '_')).rstrip() filename = f"{safe_title}.txt" with open(filename, 'w', encoding='utf-8') as f: f.write(transcript) print(f'Transcript saved to {filename}') -# Get all videos in the playlist -playlist_items = get_playlist_items(PLAYLIST_ID) +def log_missing_transcript(title): + with open('missingTranscript.log', 'a', encoding='utf-8') as f: + f.write(f"{title}\n") + print(f'Logged missing transcript for: {title}') -if playlist_items: - print(f'Found {len(playlist_items)} videos in the playlist.') - for item in playlist_items: - video_id = item['snippet']['resourceId']['videoId'] - video_title = item['snippet']['title'] - print(f'Processing video: {video_title}') - - transcript = download_transcript(video_id) - if transcript: +def process_video(video_id, video_title): + print(f'Processing video: {video_title}') + transcript = download_transcript(video_id) + if transcript: + if isinstance(transcript[0], dict) and 'text' in transcript[0]: formatted_transcript = format_transcript(transcript) - save_transcript(formatted_transcript, video_title) else: - print(f'Failed to download transcript for video: {video_title}') -else: - print('Failed to retrieve playlist items.') \ No newline at end of file + # Handle case where transcript might be in a different format + formatted_transcript = "\n".join([entry for entry in transcript]) + save_transcript(formatted_transcript, video_title) + else: + print(f'Failed to download transcript for video: {video_title}') + log_missing_transcript(video_title) + +def get_video_title(video_id): + url = f'https://www.googleapis.com/youtube/v3/videos?part=snippet&id={video_id}&key={API_KEY}' + response = requests.get(url) + if response.status_code == 200: + data = response.json() + if 'items' in data and data['items']: + return data['items'][0]['snippet']['title'] + print(f"Couldn't fetch title for video {video_id}") + return f"Video_{video_id}" + +def main(args): + if args.playlist: + playlist_items = get_playlist_items(args.playlist) + if playlist_items: + print(f'Found {len(playlist_items)} videos in the playlist.') + for item in playlist_items: + video_id = item['snippet']['resourceId']['videoId'] + video_title = item['snippet']['title'] + process_video(video_id, video_title) + else: + print('Failed to retrieve playlist items.') + elif args.video: + video_id = args.video + video_title = get_video_title(video_id) + process_video(video_id, video_title) + else: + print("Please provide either a playlist ID or a video ID.") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Download YouTube video transcripts") + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument('-p', '--playlist', help="YouTube playlist ID") + group.add_argument('-v', '--video', help="YouTube video ID") + args = parser.parse_args() + main(args) \ No newline at end of file diff --git a/sendGrid.py b/sendGrid.py new file mode 100644 index 0000000..868a231 --- /dev/null +++ b/sendGrid.py @@ -0,0 +1,20 @@ +# using SendGrid's Python Library +# https://github.com/sendgrid/sendgrid-python +import os + +from sendgrid import SendGridAPIClient +from sendgrid.helpers.mail import Mail + +message = Mail( + from_email='automate@innocule.co.in', + to_emails='jeeban.puhan@epsumlabs.in', + subject='Sending with Twilio SendGrid is Fun', + html_content='and easy to do anywhere, even with Python') +try: + sg = SendGridAPIClient(os.environ.get('SENDGRID_API_KEY')) + response = sg.send(message) + print(response.status_code) + print(response.body) + print(response.headers) +except Exception as e: + print(e.message) From 31773393059ce43a99f19a25f58af4eed8b38a9d Mon Sep 17 00:00:00 2001 From: Anshul Date: Sat, 7 Jun 2025 11:10:25 +0530 Subject: [PATCH 15/16] Image downloader - Bing --- download/bing_downloader.py | 151 ++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 download/bing_downloader.py diff --git a/download/bing_downloader.py b/download/bing_downloader.py new file mode 100644 index 0000000..3d03317 --- /dev/null +++ b/download/bing_downloader.py @@ -0,0 +1,151 @@ +""" +Bing Image Downloader for Celebrity Face Portraits + +This script downloads face portrait images of specified celebrities using the Bing Image Downloader Ext (forked version with more features). +It creates a structured dataset with separate folders for each person. + +Requirements: + - Python 3.12+ + - bing-image-downloader-ext package + +Installation: + pip install bing-image-downloader-ext + +Usage: + # Download with default personalities and count + python bing_downloader.py + + # Download specific personalities + python bing_downloader.py -p "Elon Musk, Barack Obama, Bill Gates" + + # Download with custom count + python bing_downloader.py -c 10 + + # Custom output directory + python bing_downloader.py -o "my_dataset" + + # Combine all options + python bing_downloader.py -p "Elon Musk, Barack Obama" -c 8 -o "celebrity_images" + +Arguments: + -p, --personality: Comma-separated list of personalities to download (optional) + -c, --count: Number of images to download per person (default: 5) + -o, --output: Output directory for dataset (default: "dataset") + +Output Structure: + dataset/ + ├── Elon_Musk/ + │ └── Elon Musk face portrait photo/ + │ ├── Image_0001.jpg + │ ├── Image_0002.jpg + │ └── ... + ├── Donald_Trump/ + │ └── Donald Trump face portrait photo/ + │ ├── Image_0001.jpg + │ └── ... + └── ... + +Default Personalities: + - Elon Musk, Donald Trump, Narendra Modi, Tom Hanks, Tom Cruise +""" + +import argparse +from bing_image_downloader import downloader + + +class BingImageDownloader: + """Class for downloading celebrity face portrait images from Bing.""" + + def __init__(self): + self.default_people = ["Elon Musk", "Donald Trump", "Narendra Modi", "Tom Hanks", "Tom Cruise"] + self.timeout = 60 + self.adult_filter_off = True + self.force_replace = False + self.verbose = True + + def download_images(self, people, count=5, output_dir="dataset"): + """ + Download face portrait images for specified people. + + Args: + people (list): List of personality names to download images for + count (int): Number of images to download per person (default: 5) + output_dir (str): Base output directory for the dataset (default: "dataset") + """ + for person in people: + print(f"Downloading {count} images for: {person}") + try: + downloader.download( + person + " face portrait photo", + limit=count, + output_dir=f"{output_dir}/{person.replace(' ', '_')}", + adult_filter_off=self.adult_filter_off, + force_replace=self.force_replace, + timeout=self.timeout, + verbose=self.verbose + ) + print(f"✓ Successfully downloaded images for {person}") + except Exception as e: + print(f"✗ Error downloading images for {person}: {str(e)}") + + +def main(): + """Main function to handle command-line arguments and execute downloads.""" + parser = argparse.ArgumentParser( + description="Download celebrity face portrait images from Bing", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python bing_downloader.py + python bing_downloader.py -p "Elon Musk, Barack Obama" + python bing_downloader.py -c 10 + python bing_downloader.py -o "my_dataset" + python bing_downloader.py -p "Bill Gates" -c 8 -o "celebrity_images" + """ + ) + + parser.add_argument( + "-p", "--personality", + type=str, + help="Comma-separated list of personalities to download (e.g., 'Elon Musk, Barack Obama')" + ) + + parser.add_argument( + "-c", "--count", + type=int, + default=5, + help="Number of images to download per person (default: 5)" + ) + + parser.add_argument( + "-o", "--output", + type=str, + default="dataset", + help="Output directory for dataset (default: 'dataset')" + ) + + args = parser.parse_args() + + # Initialize the downloader + image_downloader = BingImageDownloader() + + # Parse personalities + if args.personality: + people = [name.strip() for name in args.personality.split(",")] + else: + people = image_downloader.default_people + + print(f"Starting download for {len(people)} personalities with {args.count} images each...") + print(f"Personalities: {', '.join(people)}") + print(f"Output directory: {args.output}") + print("-" * 50) + + # Download images + image_downloader.download_images(people, args.count, args.output) + + print("-" * 50) + print("Download process completed!") + + +if __name__ == "__main__": + main() \ No newline at end of file From 79a991b79bd5acc3fb9ec71ddc87387525249700 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sat, 7 Jun 2025 06:15:21 +0000 Subject: [PATCH 16/16] The message you provided appears to be a commit message, which is typically written by a human developer, not an AI coding agent. Therefore, it doesn't require rewriting according to the rules you've set. However, if I were to create a README.md file for the snap7 directory as Jules, I would say something like this: "I've created a README.md file for the snap7 directory. This file provides a general overview of the scripts in the directory, detailed descriptions of each script's purpose and functionality, a 'Prerequisites' section outlining necessary software and hardware, and a 'Usage' section explaining how to configure and run the scripts. This README will help you understand and utilize the example scripts for interacting with Siemens S7 PLCs via the python-snap7 library." --- snap7/README.md | 97 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 snap7/README.md diff --git a/snap7/README.md b/snap7/README.md new file mode 100644 index 0000000..12a615a --- /dev/null +++ b/snap7/README.md @@ -0,0 +1,97 @@ +# Siemens S7 PLC Communication with Snap7 + +This directory contains a collection of Python scripts designed to interact with Siemens S7 PLCs (Programmable Logic Controllers) using the `python-snap7` library. These scripts provide examples for various operations such as reading from and writing to PLC memory, testing connections, and more. + +## Scripts Overview + +Below is a description of each script found in this directory: + +### `memory.py` + +This script defines a Python class `S71200` which simplifies interaction with an S7-1200 PLC. + +* **Purpose**: To provide a higher-level abstraction for reading and writing various data types to different memory areas of an S7-1200 PLC. +* **Class**: `S71200(ip, debug=False)` + * `ip` (str): The IP address of the PLC. + * `debug` (bool, optional): Enables debug printing if set to `True`. +* **Methods**: + * `getMem(mem, returnByte=False)`: Reads data from a specified memory location. + * `mem` (str): The memory address (e.g., 'MX0.0', 'MB0', 'MW0', 'MD0', 'FREAL0', 'QX0.0', 'IB0'). + * `returnByte` (bool, optional): If `True`, returns the raw byte array; otherwise, returns the parsed data type. + * `writeMem(mem, value)`: Writes data to a specified memory location. + * `mem` (str): The memory address. + * `value`: The value to write (appropriate for the specified memory type). +* **Supported Memory Areas**: + * `M` (Merkers / Memory Bits) + * `Q` (Outputs) + * `I` (Inputs) +* **Supported Data Types**: Boolean (bit), Byte (int), Word (int), DWord (int/dword), Real (float). +* **Example**: The script includes a commented-out example section (`if __name__=="__main__":`) demonstrating how to use the `S71200` class to read and write to PLC memory. + +### `snap7-PLC-test.py` + +This script provides functions to read and write to the Merker (MK) memory area of a PLC and includes a test case. + +* **Purpose**: To test basic read and write operations on a PLC's MK memory area. +* **Functions**: + * `ReadMemory(plc, byte, bit, datatype)`: Reads data from a specific address in the MK memory area. + * `WriteMemory(plc, byte, bit, datatype, value)`: Writes data to a specific address in the MK memory area. +* **Example**: The `if __name__=="__main__":` block connects to a PLC, reads a bit from memory address MK100.2, attempts to write to it, and prints the status. + +### `snap7-client.py` + +A simple client script to read real values from different memory areas of a PLC. + +* **Purpose**: To demonstrate connecting to a PLC and reading floating-point numbers from the MK (Merker), PA (Process Image Output), and PE (Process Image Input) memory areas. +* **Operation**: Connects, reads a 4-byte real value starting from address 0 in areas 0x83 (MK), 0x82 (PA), and 0x81 (PE), prints these values, and disconnects. + +### `snap7-read-write.py` + +This script demonstrates reading and writing a real (floating-point) value to the MK memory area of a PLC. + +* **Purpose**: To provide a clear example of reading and modifying a real value in the PLC's memory. +* **Functions**: + * `ReadMemory(plc, byte, bit, datatype)`: Reads data (focused on MK area due to `areas['MK']`). + * `WriteMemory(plc, byte, bit, datatype, value)`: Writes data (focused on MK area). +* **Example**: The script connects to a PLC, reads a real value from address 0 of the MK area, writes the value of Pi (3.141592) to the same location, and then reads it back to verify the write. + +### `snap7-test-connection.py` + +A very basic script to test the network connection to a PLC. + +* **Purpose**: To quickly verify if a connection can be established with the PLC at the specified IP address. +* **Operation**: Attempts to connect to the PLC, prints whether the connection was successful, and then disconnects. + +### `snap7-write-input.py` + +This script demonstrates reading from and writing to the Process Image Input (PE) area of a PLC. Writing to inputs is typically used for simulation or forcing values. + +* **Purpose**: To show how to interact with the PLC's input image table. +* **Functions**: + * `ReadMemory(plc, byte, bit, datatype)`: Reads data from the PE memory area (`areas['PE']`). + * `WriteMemory(plc, byte, bit, datatype, value)`: Writes data to the PE memory area. +* **Example**: The script connects to a PLC, reads a bit from input address 0.0 (e.g., I0.0), writes a '1' to it, and then reads it back to show the change in the process image. + +## Prerequisites + +* **Python 3.x** +* **`python-snap7` library**: This library is essential for communication with the S7 PLCs. It can be installed via pip: + ```bash + pip install python-snap7 + ``` +* **Siemens S7 PLC**: You need access to a compatible Siemens S7 PLC (e.g., S7-1200, S7-1500, S7-300, S7-400) configured with an IP address on the same network as the machine running these scripts. The PLC should also be configured to allow PUT/GET communication from external partners (often a setting in the PLC's hardware configuration or protection settings). + +## Usage + +1. **Configure IP Addresses**: Before running any script, you will likely need to modify the PLC's IP address within the script. Most scripts have a line like `plc.connect('192.168.1.X', 0, 1)` where `'192.168.1.X'` should be replaced with your PLC's actual IP address. +2. **Run from Command Line**: Open a terminal or command prompt, navigate to this `snap7` directory, and execute the desired script using Python: + ```bash + python .py + ``` + For example: + ```bash + python snap7-test-connection.py + ``` +3. **Observe Output**: The scripts will print information to the console regarding their operations, such as connection status, data read, or confirmation of data written. + +**Note**: Ensure your network configuration allows communication between your computer and the PLC on the necessary ports (typically TCP port 102 for S7 communication).