diff --git a/.gitignore b/.gitignore index 4236406..7af56d7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ +transcripts/* + +.DS_Store +.idea/* + # media files *.mp4 *.pdf @@ -131,3 +136,4 @@ dmypy.json # Pyre type checker .pyre/ +sendgrid.env diff --git a/README.md b/README.md index 814d249..f34ef10 100644 --- a/README.md +++ b/README.md @@ -19,3 +19,13 @@ Some useful python code snippets and utilities. [Snap7](https://github.com/anshulkhare7/PythonUtils/blob/master/snap7) - Snap7 is a python library to connect to Siemens PLCs. [Merge PDF](https://github.com/anshulkhare7/PythonUtils/blob/master/mergePDF.py) - Merge PDF files using PyPDF2 package. Install with `pip install pypdf2`. Based on [this.](https://realpython.com/pdf-python/). Requires python 3.8. + +[Convert Audio to Video](https://github.com/anshulkhare7/PythonUtils/blob/master/audio2video.py) - Convert audio file to video file. + + +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Twitter](https://img.shields.io/twitter/follow/_anshulkhare?style=social)](https://twitter.com/_anshulkhare) + +# License + +This repository is released under the [MIT license](https://opensource.org/licenses/MIT). In short, this means you are free to use this software in any personal, open-source or commercial projects. Attribution is optional but appreciated. + diff --git a/YT/playlistInfo.py b/YT/playlistInfo.py new file mode 100644 index 0000000..6fdcdba --- /dev/null +++ b/YT/playlistInfo.py @@ -0,0 +1,87 @@ +import os +import re +import sys + +from googleapiclient.discovery import build + +# Read the API key from environment variable +api_key = os.environ.get('YOUTUBE_API_KEY') + +if not api_key: + raise ValueError("Please set the YOUTUBE_API_KEY environment variable") + +# Set up the YouTube API client +youtube = build('youtube', 'v3', developerKey=api_key) + +def get_video_details(video_id): + request = youtube.videos().list( + part="contentDetails", + id=video_id + ) + response = request.execute() + return response['items'][0]['contentDetails'] if response['items'] else None + +def parse_duration(duration): + match = re.match(r'PT(\d+H)?(\d+M)?(\d+S)?', duration) + if not match: + return 0 + + hours = int(match.group(1)[:-1]) if match.group(1) else 0 + minutes = int(match.group(2)[:-1]) if match.group(2) else 0 + seconds = int(match.group(3)[:-1]) if match.group(3) else 0 + + return hours * 3600 + minutes * 60 + seconds + +def is_short(duration): + # YouTube Shorts are typically 60 seconds or less + return parse_duration(duration) <= 60 + +def get_playlist_titles(playlist_id): + titles = [] + next_page_token = None + + while True: + # Make the API request + request = youtube.playlistItems().list( + part='snippet', + playlistId=playlist_id, + maxResults=50, + pageToken=next_page_token + ) + response = request.execute() + + # Extract video details and filter out shorts + for item in response['items']: + video_id = item['snippet']['resourceId']['videoId'] + video_details = get_video_details(video_id) + if video_details and not is_short(video_details['duration']): + titles.append(item['snippet']['title']) + + # Check if there are more pages + next_page_token = response.get('nextPageToken') + if not next_page_token: + break + + return titles + +def main(): + if len(sys.argv) != 2: + print("Usage: python script_name.py ") + sys.exit(1) + + playlist_id = sys.argv[1] + video_titles = get_playlist_titles(playlist_id) + + # Print the titles + for title in video_titles: + print(title) + + # Save titles to a file + with open('playlist_titles.txt', 'w', encoding='utf-8') as f: + for title in video_titles: + f.write(f"{title}\n") + + print(f"\nTitles of {len(video_titles)} regular videos have been saved to playlist_titles.txt") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/YT/videoTranscript.py b/YT/videoTranscript.py new file mode 100644 index 0000000..08c4ff9 --- /dev/null +++ b/YT/videoTranscript.py @@ -0,0 +1,126 @@ +import argparse +import os +import sys + +import requests +from youtube_transcript_api import TranscriptsDisabled, YouTubeTranscriptApi + +API_KEY = os.getenv('YOUTUBE_API_KEY') + +def get_playlist_items(playlist_id): + items = [] + next_page_token = None + while True: + url = f'https://www.googleapis.com/youtube/v3/playlistItems?part=snippet&maxResults=50&playlistId={playlist_id}&key={API_KEY}' + if next_page_token: + url += f'&pageToken={next_page_token}' + response = requests.get(url) + if response.status_code == 200: + data = response.json() + items.extend(data['items']) + next_page_token = data.get('nextPageToken') + if not next_page_token: + break + else: + print(f'Error fetching playlist items: {response.status_code}') + return None + return items + +def download_transcript(video_id): + try: + # First, try to get the English transcript + return YouTubeTranscriptApi.get_transcript(video_id, languages=['en']) + except TranscriptsDisabled: + print(f"Transcripts are disabled for video {video_id}") + return None + except Exception as e: + if "No transcripts were found" in str(e): + print(f"No English transcript found for video {video_id}. Trying other languages...") + try: + # If English is not available, get a list of available transcripts + transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) + + # Try to get the transcript in any available language + for transcript in transcript_list: + return transcript.fetch() + + print(f"No transcripts found in any language for video {video_id}") + return None + except Exception as inner_e: + print(f"Error fetching transcript for video {video_id}: {str(inner_e)}") + return None + else: + print(f"Error downloading transcript for video {video_id}: {str(e)}") + return None + +def format_transcript(transcript): + formatted = "" + for entry in transcript: + start_time = int(entry['start']) + minutes, seconds = divmod(start_time, 60) + hours, minutes = divmod(minutes, 60) + timestamp = f"{hours:02d}:{minutes:02d}:{seconds:02d}" + formatted += f"[{timestamp}] {entry['text']}\n" + return formatted + +def save_transcript(transcript, title): + safe_title = ''.join(c for c in title if c.isalnum() or c in (' ', '.', '_')).rstrip() + filename = f"{safe_title}.txt" + with open(filename, 'w', encoding='utf-8') as f: + f.write(transcript) + print(f'Transcript saved to {filename}') + +def log_missing_transcript(title): + with open('missingTranscript.log', 'a', encoding='utf-8') as f: + f.write(f"{title}\n") + print(f'Logged missing transcript for: {title}') + +def process_video(video_id, video_title): + print(f'Processing video: {video_title}') + transcript = download_transcript(video_id) + if transcript: + if isinstance(transcript[0], dict) and 'text' in transcript[0]: + formatted_transcript = format_transcript(transcript) + else: + # Handle case where transcript might be in a different format + formatted_transcript = "\n".join([entry for entry in transcript]) + save_transcript(formatted_transcript, video_title) + else: + print(f'Failed to download transcript for video: {video_title}') + log_missing_transcript(video_title) + +def get_video_title(video_id): + url = f'https://www.googleapis.com/youtube/v3/videos?part=snippet&id={video_id}&key={API_KEY}' + response = requests.get(url) + if response.status_code == 200: + data = response.json() + if 'items' in data and data['items']: + return data['items'][0]['snippet']['title'] + print(f"Couldn't fetch title for video {video_id}") + return f"Video_{video_id}" + +def main(args): + if args.playlist: + playlist_items = get_playlist_items(args.playlist) + if playlist_items: + print(f'Found {len(playlist_items)} videos in the playlist.') + for item in playlist_items: + video_id = item['snippet']['resourceId']['videoId'] + video_title = item['snippet']['title'] + process_video(video_id, video_title) + else: + print('Failed to retrieve playlist items.') + elif args.video: + video_id = args.video + video_title = get_video_title(video_id) + process_video(video_id, video_title) + else: + print("Please provide either a playlist ID or a video ID.") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Download YouTube video transcripts") + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument('-p', '--playlist', help="YouTube playlist ID") + group.add_argument('-v', '--video', help="YouTube video ID") + args = parser.parse_args() + main(args) \ No newline at end of file diff --git a/convert/generatePDF.py b/convert/generatePDF.py new file mode 100644 index 0000000..3eba208 --- /dev/null +++ b/convert/generatePDF.py @@ -0,0 +1,35 @@ +import os +import random +from fpdf import FPDF + +def generate_pdf(file_path, size_kb): + pdf = FPDF() + pdf.add_page() + pdf.set_font('Arial', size=12) + # Add some text content to the PDF + pdf.cell(200, 10, txt="Sample PDF content", ln=True, align='C') + + # Save the PDF file + pdf.output(file_path) + + # Adjust the file size + with open(file_path, 'ab') as f: + current_size = os.path.getsize(file_path) + target_size = size_kb * 1024 + if current_size < target_size: + # Add padding to reach the target size + f.write(b'0' * (target_size - current_size)) + +def generate_sample_pdfs(prefix, min_size_kb, max_size_kb, num_files=100, folder="output"): + # Create the folder if it doesn't exist + os.makedirs(folder, exist_ok=True) + + for i in range(num_files): + size_kb = random.randint(min_size_kb, max_size_kb) + file_name = f"{prefix}_{size_kb}KB_{i+1}.pdf" + file_path = os.path.join(folder, file_name) + generate_pdf(file_path, size_kb) + print(f"Generated: {file_path} of size {size_kb} KB") + +# Example usage: Specify folder where the files will be created +generate_sample_pdfs(prefix="sample", min_size_kb=100, max_size_kb=1024, num_files=25, folder="/Users/anshul/tmp/s3") \ No newline at end of file diff --git a/mergePDF.py b/convert/mergePDF.py similarity index 64% rename from mergePDF.py rename to convert/mergePDF.py index c64029e..4c78890 100755 --- a/mergePDF.py +++ b/convert/mergePDF.py @@ -1,7 +1,7 @@ #!/usr/bin/env python from PyPDF2 import PdfFileReader, PdfFileWriter -import pikepdf +#import pikepdf def remove_page(path, output, page_to_Remove): pdf_writer = PdfFileWriter() @@ -70,13 +70,24 @@ def decrypt_pdf2(input_path, output_path, password): print("Total pages:", num_pages) if __name__ == '__main__': - doc_1 = '/Users/anshul/Downloads/input01.pdf' - doc_2 = '/Users/anshul/Downloads/input02.pdf' + doc_1 = '/Users/anshul/Documents/docs/E-Aadhar-password-ANSH1981.pdf' + doc_2 = '/Users/anshul/Documents/docs/E-Aadhar-password-ANSH1981-dec.pdf' + # doc_3 = '/Users/anshul/Downloads/Shruti-Quantum-9991336776.pdf' + # doc_4 = '/Users/anshul/Downloads/Shruti-Quantum-9991353164.pdf' + # doc_5 = '/Users/anshul/Downloads/page-05.pdf' + # doc_6 = '/Users/anshul/Downloads/page-06.pdf' + # doc_7 = '/Users/anshul/Downloads/page-07.pdf' + # doc_8 = '/Users/anshul/Downloads/page-08.pdf' + # doc_9 = '/Users/anshul/Downloads/page-09.pdf' + # doc_10 = '/Users/anshul/Downloads/page-10.pdf' + #extract_information('/Users/anshul/Downloads/abc.pdf') - paths = [doc_1, doc_2] - merge_pdfs(paths, output='/Users/anshul/Downloads/output.pdf') - #remove_page(doc_1,doc_2,1) - #encrypted = '/Users/anshul/Downloads/abc.pdf' - #decrypted = '/Users/anshul/Downloads/abc.pdf' - ##password = 'password' - #decrypt_pdf(encrypted, decrypted, password) + # paths = [doc_1, doc_2, doc_3, doc_4, doc_5, doc_6, doc_7, doc_8, doc_9, doc_10 ] + # paths = [doc_1, doc_2, doc_3] + # merge_pdfs(paths, output='/Users/anshul/Downloads/life-certificate-2021.pdf') + # remove_page(doc_1,doc_2,2) + # encrypted = '/Users/anshul/Downloads/ELSS-Statement-encrypted.pdf' + # decrypted = '/Users/anshul/Downloads/ELSS-Statement.pdf' + # password = 'AMYPK6172H' + decrypt_pdf(doc_1, doc_2, "ANSH1981") + # decrypt_pdf(doc_2, doc_4, "BLXPS4482F") diff --git a/convert/onlineBookToPDF.py b/convert/onlineBookToPDF.py new file mode 100644 index 0000000..8d4babc --- /dev/null +++ b/convert/onlineBookToPDF.py @@ -0,0 +1,99 @@ +import requests +from bs4 import BeautifulSoup +from urllib.parse import urljoin, urlparse +from reportlab.lib.pagesizes import letter +from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image +from reportlab.lib.styles import getSampleStyleSheet +from io import BytesIO +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +import networkx as nx + +class WebsiteToPDFConverter: + def __init__(self, base_url): + self.base_url = base_url + self.domain = urlparse(base_url).netloc + self.graph = nx.DiGraph() + self.content = [] + self.styles = getSampleStyleSheet() + + # Set up Selenium WebDriver + chrome_options = Options() + chrome_options.add_argument("--headless") + self.driver = webdriver.Chrome(options=chrome_options) + + def get_soup(self, url): + self.driver.get(url) + WebDriverWait(self.driver, 10).until( + EC.presence_of_element_located((By.TAG_NAME, "body")) + ) + return BeautifulSoup(self.driver.page_source, 'html.parser') + + def extract_content(self, url, soup): + main_content = soup.find('main') or soup.find('article') or soup.find('div', class_='content') + if main_content: + for element in main_content.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'img']): + if element.name.startswith('h'): + self.content.append(Paragraph(element.text, self.styles[element.name])) + elif element.name == 'p': + self.content.append(Paragraph(element.text, self.styles['Normal'])) + elif element.name == 'img': + img_url = urljoin(url, element['src']) + try: + img_data = BytesIO(requests.get(img_url).content) + img = Image(img_data, width=300, height=200) + self.content.append(img) + except Exception as e: + print(f"Error processing image {img_url}: {e}") + self.content.append(Spacer(1, 12)) + + def build_site_graph(self, url, parent=None): + if url in self.graph: + return + + print(f"Mapping: {url}") + self.graph.add_node(url) + if parent: + self.graph.add_edge(parent, url) + + soup = self.get_soup(url) + nav_menu = soup.find('nav') or soup.find('ul', class_='menu') + + if nav_menu: + for link in nav_menu.find_all('a', href=True): + next_url = urljoin(url, link['href']) + if next_url.startswith(self.base_url) and self.domain in next_url: + self.build_site_graph(next_url, url) + + def process_site(self): + self.build_site_graph(self.base_url) + + for url in nx.dfs_preorder_nodes(self.graph, self.base_url): + print(f"Processing: {url}") + soup = self.get_soup(url) + self.extract_content(url, soup) + + def create_pdf(self, output_filename): + doc = SimpleDocTemplate(output_filename, pagesize=letter) + doc.build(self.content) + + def cleanup(self): + self.driver.quit() + +def main(): + base_url = input("Enter the base URL of the website: ") + output_filename = input("Enter the output PDF filename: ") + + converter = WebsiteToPDFConverter(base_url) + try: + converter.process_site() + converter.create_pdf(output_filename) + print(f"PDF created: {output_filename}") + finally: + converter.cleanup() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/data-transform/main.py b/data-transform/main.py new file mode 100644 index 0000000..df82fa5 --- /dev/null +++ b/data-transform/main.py @@ -0,0 +1,101 @@ +import csv +import json + +# Your JSON data +json_data = '{"list":[{"shortId":"aplive","platform":14,"platformAccount":0,"platformSection":17},{"shortId":"m00001","platform":5,"platformAccount":0,"platformSection":0},{"shortId":"m00002","platform":6,"platformAccount":0,"platformSection":0},{"shortId":"m00003","platform":12,"platformAccount":0,"platformSection":0},{"shortId":"m00004","platform":10,"platformAccount":19,"platformSection":16},{"shortId":"m00005","platform":10,"platformAccount":20,"platformSection":16},{"shortId":"m00006","platform":11,"platformAccount":0,"platformSection":0},{"shortId":"m00007","platform":2,"platformAccount":9,"platformSection":6},{"shortId":"m00009","platform":2,"platformAccount":9,"platformSection":8},{"shortId":"m00010","platform":2,"platformAccount":10,"platformSection":6},{"shortId":"m00012","platform":2,"platformAccount":10,"platformSection":8},{"shortId":"m00013","platform":4,"platformAccount":13,"platformSection":11},{"shortId":"m00014","platform":4,"platformAccount":13,"platformSection":12},{"shortId":"m00015","platform":4,"platformAccount":14,"platformSection":11},{"shortId":"m00016","platform":4,"platformAccount":14,"platformSection":12},{"shortId":"m00017","platform":3,"platformAccount":11,"platformSection":9},{"shortId":"m00018","platform":3,"platformAccount":11,"platformSection":10},{"shortId":"m00019","platform":3,"platformAccount":12,"platformSection":9},{"shortId":"m00020","platform":3,"platformAccount":12,"platformSection":10},{"shortId":"m00021","platform":1,"platformAccount":1,"platformSection":1},{"shortId":"m00022","platform":1,"platformAccount":1,"platformSection":2},{"shortId":"m00023","platform":1,"platformAccount":1,"platformSection":3},{"shortId":"m00024","platform":1,"platformAccount":1,"platformSection":4},{"shortId":"m00025","platform":1,"platformAccount":1,"platformSection":5},{"shortId":"m00026","platform":1,"platformAccount":2,"platformSection":1},{"shortId":"m00027","platform":1,"platformAccount":2,"platformSection":2},{"shortId":"m00028","platform":1,"platformAccount":2,"platformSection":3},{"shortId":"m00029","platform":1,"platformAccount":2,"platformSection":4},{"shortId":"m00030","platform":1,"platformAccount":2,"platformSection":5},{"shortId":"m00031","platform":1,"platformAccount":3,"platformSection":1},{"shortId":"m00032","platform":1,"platformAccount":3,"platformSection":2},{"shortId":"m00033","platform":1,"platformAccount":3,"platformSection":3},{"shortId":"m00034","platform":1,"platformAccount":3,"platformSection":4},{"shortId":"m00035","platform":1,"platformAccount":3,"platformSection":5},{"shortId":"m00036","platform":1,"platformAccount":4,"platformSection":1},{"shortId":"m00037","platform":1,"platformAccount":4,"platformSection":2},{"shortId":"m00038","platform":1,"platformAccount":4,"platformSection":3},{"shortId":"m00039","platform":1,"platformAccount":4,"platformSection":4},{"shortId":"m00040","platform":1,"platformAccount":4,"platformSection":5},{"shortId":"m00041","platform":1,"platformAccount":5,"platformSection":1},{"shortId":"m00042","platform":1,"platformAccount":5,"platformSection":2},{"shortId":"m00043","platform":1,"platformAccount":5,"platformSection":3},{"shortId":"m00044","platform":1,"platformAccount":5,"platformSection":4},{"shortId":"m00045","platform":1,"platformAccount":5,"platformSection":5},{"shortId":"m00046","platform":1,"platformAccount":6,"platformSection":1},{"shortId":"m00047","platform":1,"platformAccount":6,"platformSection":2},{"shortId":"m00048","platform":1,"platformAccount":6,"platformSection":3},{"shortId":"m00049","platform":1,"platformAccount":6,"platformSection":4},{"shortId":"m00050","platform":1,"platformAccount":6,"platformSection":5},{"shortId":"m00051","platform":1,"platformAccount":7,"platformSection":1},{"shortId":"m00052","platform":1,"platformAccount":7,"platformSection":2},{"shortId":"m00053","platform":1,"platformAccount":7,"platformSection":3},{"shortId":"m00054","platform":1,"platformAccount":7,"platformSection":4},{"shortId":"m00055","platform":1,"platformAccount":7,"platformSection":5},{"shortId":"m00056","platform":1,"platformAccount":8,"platformSection":1},{"shortId":"m00057","platform":1,"platformAccount":8,"platformSection":2},{"shortId":"m00058","platform":1,"platformAccount":8,"platformSection":3},{"shortId":"m00059","platform":1,"platformAccount":8,"platformSection":4},{"shortId":"m00060","platform":1,"platformAccount":8,"platformSection":5},{"shortId":"m00061","platform":7,"platformAccount":15,"platformSection":13},{"shortId":"m00062","platform":7,"platformAccount":16,"platformSection":13},{"shortId":"m00063","platform":9,"platformAccount":0,"platformSection":14},{"shortId":"m00064","platform":9,"platformAccount":0,"platformSection":15},{"shortId":"m00065","platform":8,"platformAccount":17,"platformSection":0},{"shortId":"m00066","platform":8,"platformAccount":18,"platformSection":0},{"shortId":"m00067","platform":13,"platformAccount":0,"platformSection":0},{"shortId":"m00068","platform":3,"platformAccount":11,"platformSection":18},{"shortId":"m00069","platform":3,"platformAccount":12,"platformSection":18},{"shortId":"m00070","platform":14,"platformAccount":0,"platformSection":19},{"shortId":"m00071","platform":14,"platformAccount":0,"platformSection":20},{"shortId":"m00072","platform":9,"platformAccount":0,"platformSection":21},{"shortId":"m00073","platform":1,"platformAccount":1,"platformSection":22},{"shortId":"m00074","platform":1,"platformAccount":2,"platformSection":22},{"shortId":"m00075","platform":15,"platformAccount":0,"platformSection":0}]}' + +# Load JSON data +data = json.loads(json_data) + +# Extract the list of dictionaries +data_list = data['list'] + +# Mapping of platform values +platform_mapping = { + 1: "MKTPlatformYoutube", + 2: "MKTPlatformFacebook", + 3: "MKTPlatformInstagram", + 4: "MKTPlatformWhatsapp", + 5: "MKTPlatformEmail", + 6: "MKTPlatformTelegram", + 7: "MKTPlatformTwitter", + 8: "MKTPlatformKoo", + 9: "MKTPlatformApp", + 10: "MKTPlatformLinkedIn", + 11: "MKTPlatformGoogleAds", + 12: "MKTPlatformQuora", + 13: "MKTPlatformFacebookAds", + 14: "MKTPlatformWebsite", + 15: "MKTPlatformWhatsappChannel", +} + +platform_account_mapping = { + 1: "MKTPlatformAccountYTMainHindi", + 2: "MKTPlatformAccountYTMainEnglish", + 3: "MKTPlatformAccountYTSadho", + 4: "MKTPlatformAccountYTShastraGyan", + 5: "MKTPlatformAccountYTYuvaMitra", + 6: "MKTPlatformAccountYTFreshBlades", + 7: "MKTPlatformAccountYTSaintsAndScriptures", + 8: "MKTPlatformAccountYTNotEvenOne", + 9: "MKTPlatformAccountFBMainHindi", + 10: "MKTPlatformAccountFBMainEnglish", + 11: "MKTPlatformAccountInstaMainHindi", + 12: "MKTPlatformAccountInstaMainEnglish", + 13: "MKTPlatformAccountWhatsAppOutreach", + 14: "MKTPlatformAccountWhatsAppRP", + 15: "MKTPlatformAccountTwitterHindi", + 16: "MKTPlatformAccountTwitterEnglish", + 17: "MKTPlatformAccountKooHindi", + 18: "MKTPlatformAccountKooEnglish", + 19: "MKTPlatformAccountLinkedInAPProfile", + 20: "MKTPlatformAccountLinkedInPAFPage", +} + +platform_section_mapping = { + 1 :"MKTPlatformSectionYTDescription", + 2 :"MKTPlatformSectionYTPinnedComment", + 3 :"MKTPlatformSectionYTCard", + 4 :"MKTPlatformSectionYTEndSlide", + 5 :"MKTPlatformSectionYTCommunity", + 6 :"MKTPlatformSectionFBPage", + 7 :"DeprecatedMKTPlatformSectionFBAd", + 8 :"MKTPlatformSectionFBStories", + 9 :"MKTPlatformSectionInstaStories", + 10:"MKTPlatformSectionInstaProfileLink", + 11:"MKTPlatformSectionWhatsappBroadcast", + 12:"MKTPlatformSectionWhatsappStories", + 13:"MKTPlatformSectionTwitterFeed", + 14:"MKTPlatformSectionAppWisdomFeedH", + 15:"MKTPlatformSectionAppWisdomFeedE", + 16:"MKTPlatformSectionLinkedInPage", + 17:"MKTPlatformSectionWebsiteGrace", + 18:"MKTPlatformSectionInstaBroadcast", + 19:"MKTPlatformSectionWebsiteArticles", + 20:"MKTPlatformSectionWebsiteHome", + 21:"MKTPlatformSectionAppGitaFeed", + 22:"MKTPlatformSectionYTCommentReply" +} + +# Replace "platform" values +for row in data_list: + row['platformSection'] = platform_section_mapping.get(row['platformSection'], row['platformSection']) + row['platformAccount'] = platform_account_mapping.get(row['platformAccount'], row['platformAccount']) + row['platform'] = platform_mapping.get(row['platform'], row['platform']) + +# Specify the CSV file path +csv_file_path = 'output.csv' + +# Write to CSV +with open(csv_file_path, 'w', newline='') as csvfile: + fieldnames = data_list[0].keys() + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + + # Write header + writer.writeheader() + + # Write rows + for row in data_list: + writer.writerow(row) + +print(f'CSV file "{csv_file_path}" has been created.') diff --git a/data-transform/mergeTxtFiles.py b/data-transform/mergeTxtFiles.py new file mode 100644 index 0000000..5f73486 --- /dev/null +++ b/data-transform/mergeTxtFiles.py @@ -0,0 +1,36 @@ +import argparse +import os +import re + + +def time_to_seconds(time_str): + h, m, s = map(int, time_str.split(':')) + return h * 3600 + m * 60 + s + +def merge_txt_files(folder_path): + output_file = os.path.join(folder_path, 'output.txt') + + with open(output_file, 'w') as outfile: + # Sort files to ensure consistent ordering + files = sorted([f for f in os.listdir(folder_path) if f.endswith(".txt") and f != 'output.txt']) + + for index, filename in enumerate(files, start=1): + file_path = os.path.join(folder_path, filename) + with open(file_path, 'r') as infile: + for line in infile: + # Extract timestamp + match = re.match(r'\[(\d{2}:\d{2}:\d{2})\](.*)', line) + if match: + time_str, content = match.groups() + seconds = time_to_seconds(time_str) + # Write the modified line + outfile.write(f"{index}-[{seconds}]{content.strip()}\n") + + print(f"All text files merged into {output_file}") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Merge all text files in the folder into output.txt") + parser.add_argument('folder_path', type=str, help='Path to the folder containing the text files') + + args = parser.parse_args() + merge_txt_files(args.folder_path) \ No newline at end of file diff --git a/pointInPolygon.py b/data-transform/pointInPolygon.py similarity index 100% rename from pointInPolygon.py rename to data-transform/pointInPolygon.py diff --git a/download/bing_downloader.py b/download/bing_downloader.py new file mode 100644 index 0000000..3d03317 --- /dev/null +++ b/download/bing_downloader.py @@ -0,0 +1,151 @@ +""" +Bing Image Downloader for Celebrity Face Portraits + +This script downloads face portrait images of specified celebrities using the Bing Image Downloader Ext (forked version with more features). +It creates a structured dataset with separate folders for each person. + +Requirements: + - Python 3.12+ + - bing-image-downloader-ext package + +Installation: + pip install bing-image-downloader-ext + +Usage: + # Download with default personalities and count + python bing_downloader.py + + # Download specific personalities + python bing_downloader.py -p "Elon Musk, Barack Obama, Bill Gates" + + # Download with custom count + python bing_downloader.py -c 10 + + # Custom output directory + python bing_downloader.py -o "my_dataset" + + # Combine all options + python bing_downloader.py -p "Elon Musk, Barack Obama" -c 8 -o "celebrity_images" + +Arguments: + -p, --personality: Comma-separated list of personalities to download (optional) + -c, --count: Number of images to download per person (default: 5) + -o, --output: Output directory for dataset (default: "dataset") + +Output Structure: + dataset/ + ├── Elon_Musk/ + │ └── Elon Musk face portrait photo/ + │ ├── Image_0001.jpg + │ ├── Image_0002.jpg + │ └── ... + ├── Donald_Trump/ + │ └── Donald Trump face portrait photo/ + │ ├── Image_0001.jpg + │ └── ... + └── ... + +Default Personalities: + - Elon Musk, Donald Trump, Narendra Modi, Tom Hanks, Tom Cruise +""" + +import argparse +from bing_image_downloader import downloader + + +class BingImageDownloader: + """Class for downloading celebrity face portrait images from Bing.""" + + def __init__(self): + self.default_people = ["Elon Musk", "Donald Trump", "Narendra Modi", "Tom Hanks", "Tom Cruise"] + self.timeout = 60 + self.adult_filter_off = True + self.force_replace = False + self.verbose = True + + def download_images(self, people, count=5, output_dir="dataset"): + """ + Download face portrait images for specified people. + + Args: + people (list): List of personality names to download images for + count (int): Number of images to download per person (default: 5) + output_dir (str): Base output directory for the dataset (default: "dataset") + """ + for person in people: + print(f"Downloading {count} images for: {person}") + try: + downloader.download( + person + " face portrait photo", + limit=count, + output_dir=f"{output_dir}/{person.replace(' ', '_')}", + adult_filter_off=self.adult_filter_off, + force_replace=self.force_replace, + timeout=self.timeout, + verbose=self.verbose + ) + print(f"✓ Successfully downloaded images for {person}") + except Exception as e: + print(f"✗ Error downloading images for {person}: {str(e)}") + + +def main(): + """Main function to handle command-line arguments and execute downloads.""" + parser = argparse.ArgumentParser( + description="Download celebrity face portrait images from Bing", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python bing_downloader.py + python bing_downloader.py -p "Elon Musk, Barack Obama" + python bing_downloader.py -c 10 + python bing_downloader.py -o "my_dataset" + python bing_downloader.py -p "Bill Gates" -c 8 -o "celebrity_images" + """ + ) + + parser.add_argument( + "-p", "--personality", + type=str, + help="Comma-separated list of personalities to download (e.g., 'Elon Musk, Barack Obama')" + ) + + parser.add_argument( + "-c", "--count", + type=int, + default=5, + help="Number of images to download per person (default: 5)" + ) + + parser.add_argument( + "-o", "--output", + type=str, + default="dataset", + help="Output directory for dataset (default: 'dataset')" + ) + + args = parser.parse_args() + + # Initialize the downloader + image_downloader = BingImageDownloader() + + # Parse personalities + if args.personality: + people = [name.strip() for name in args.personality.split(",")] + else: + people = image_downloader.default_people + + print(f"Starting download for {len(people)} personalities with {args.count} images each...") + print(f"Personalities: {', '.join(people)}") + print(f"Output directory: {args.output}") + print("-" * 50) + + # Download images + image_downloader.download_images(people, args.count, args.output) + + print("-" * 50) + print("Download process completed!") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/download/downloadBuffettLetters.py b/download/downloadBuffettLetters.py new file mode 100644 index 0000000..bd62731 --- /dev/null +++ b/download/downloadBuffettLetters.py @@ -0,0 +1,38 @@ +import requests +from bs4 import BeautifulSoup + +# List of years to scrape +years = list(range(1977, 1998)) + +# Initialize an empty list to store the text content +all_text = [] + +# Loop through each year and download the text +for year in years: + url = f"https://www.berkshirehathaway.com/letters/%7Byear%7D.html" + print(f"Downloading text for {year}...") + + try: + response = requests.get(url) + + if response.status_code == 200: + soup = BeautifulSoup(response.content, "html.parser") + + # Extract the text content from the webpage + text = soup.get_text() + all_text.append(text) + print(f"Successfully downloaded text for {year}") + else: + print(f"Error fetching data for {year}") + except requests.exceptions.RequestException as e: + print(f"Error downloading text for {year}: {e}") + continue + +# Combine all the text content into a single string +combined_text = "\n\n".join(all_text) + +# Write the combined text to a file +with open("website_text.txt", "w", encoding="utf-8") as file: + file.write(combined_text) + +print("Text content saved to 'website_text.txt'.") \ No newline at end of file diff --git a/downloadYoutube.py b/download/downloadYoutube.py similarity index 56% rename from downloadYoutube.py rename to download/downloadYoutube.py index 6136a8b..2a97dd4 100755 --- a/downloadYoutube.py +++ b/download/downloadYoutube.py @@ -9,4 +9,6 @@ youtube_link = args["link"] -YouTube(youtube_link).streams.first().download() \ No newline at end of file +# YouTube(youtube_link).streams.first().download() +yt = YouTube(youtube_link) +yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').asc().first().download() \ No newline at end of file diff --git a/whatsapp.py b/download/whatsapp.py similarity index 100% rename from whatsapp.py rename to download/whatsapp.py diff --git a/environment.yml b/environment.yml index 8fb24b6..0a5123c 100644 --- a/environment.yml +++ b/environment.yml @@ -1,66 +1,68 @@ name: python-utils channels: + - conda-forge - defaults dependencies: - - ca-certificates=2020.7.22=0 - - certifi=2020.6.20=py38_0 - - libcxx=10.0.0=1 - - libedit=3.1.20191231=h1de35cc_1 - - libffi=3.3=hb1e8313_2 - - ncurses=6.2=h0a44026_1 - - openssl=1.1.1h=haf1e3a3_0 - - pip=20.2.2=py38_0 - - python=3.8.5=h26836e1_1 - - readline=8.0=h1de35cc_0 - - setuptools=49.6.0=py38_0 - - sqlite=3.33.0=hffcf06c_0 - - tk=8.6.10=hb0a8c7a_0 - - wheel=0.35.1=py_0 - - xz=5.2.5=h1de35cc_0 - - zlib=1.2.11=h1de35cc_3 + - bzip2=1.0.8=h99b78c6_7 + - ca-certificates=2024.7.4=hf0a4a13_0 + - libexpat=2.6.2=hebf3989_0 + - libffi=3.4.2=h3422bc3_5 + - libsqlite=3.46.0=hfb93653_0 + - libzlib=1.3.1=hfb2fe0b_1 + - ncurses=6.5=hb89a1cb_0 + - openssl=3.3.1=hfb2fe0b_2 + - pip=24.2=pyhd8ed1ab_0 + - python=3.11.9=h932a869_0_cpython + - readline=8.2=h92ec313_1 + - setuptools=72.1.0=pyhd8ed1ab_0 + - tk=8.6.13=h5083fa2_1 + - tzdata=2024a=h0c530f3_0 + - wheel=0.44.0=pyhd8ed1ab_0 + - xz=5.2.6=h57fd34a_0 - pip: - - appdirs==1.4.4 - - audioread==2.1.9 - - audiotsm==0.1.2 - - cffi==1.14.4 - - chardet==4.0.0 - - decorator==4.4.2 - - ffmpeg-python==0.2.0 - - fire==0.3.1 - - future==0.18.2 - - idna==2.10 - - imageio==2.9.0 - - imageio-ffmpeg==0.4.3 - - iso8601==0.1.13 - - joblib==1.0.0 - - librosa==0.8.0 - - llvmlite==0.35.0 - - lxml==4.6.2 - - m3u8==0.8.0 - - moviepy==1.0.3 - - numba==0.52.0 - - numpy==1.19.4 - - packaging==20.8 - - pikepdf==2.2.2 - - pillow==8.0.1 - - pooch==1.3.0 - - proglog==0.1.9 - - psutil==5.8.0 - - pycparser==2.20 - - pyparsing==2.4.7 - - pypdf2==1.26.0 - - pytube==10.4.1 - - requests==2.25.1 - - resampy==0.2.2 - - scikit-learn==0.24.0 - - scipy==1.5.4 - - selenium==3.141.0 - - six==1.15.0 - - soundfile==0.10.3.post1 - - termcolor==1.1.0 - - threadpoolctl==2.1.0 - - tqdm==4.56.0 - - typing-extensions==3.7.4.3 - - urllib3==1.26.2 - - whatsapp-web==0.0.1 - + - attrs==24.1.0 + - beautifulsoup4==4.12.3 + - blinker==1.8.2 + - cachetools==5.5.0 + - certifi==2024.7.4 + - chardet==5.2.0 + - charset-normalizer==3.3.2 + - click==8.1.7 + - flask==3.0.3 + - fpdf==1.7.2 + - google-api-core==2.20.0 + - google-api-python-client==2.146.0 + - google-auth==2.35.0 + - google-auth-httplib2==0.2.0 + - googleapis-common-protos==1.65.0 + - h11==0.14.0 + - httplib2==0.22.0 + - idna==3.7 + - itsdangerous==2.2.0 + - jinja2==3.1.4 + - markupsafe==2.1.5 + - networkx==3.3 + - outcome==1.3.0.post0 + - pillow==10.4.0 + - proto-plus==1.24.0 + - protobuf==5.28.2 + - pyasn1==0.6.1 + - pyasn1-modules==0.4.1 + - pyparsing==3.1.4 + - pysocks==1.7.1 + - reportlab==4.2.2 + - requests==2.32.3 + - rsa==4.9 + - selenium==4.23.1 + - sniffio==1.3.1 + - sortedcontainers==2.4.0 + - soupsieve==2.5 + - trio==0.26.1 + - trio-websocket==0.11.1 + - typing-extensions==4.12.2 + - uritemplate==4.1.1 + - urllib3==2.2.2 + - websocket-client==1.8.0 + - werkzeug==3.0.4 + - wsproto==1.2.0 +prefix: /Users/anshul/anaconda3/envs/python-utils diff --git a/raspi/gpio.py b/raspi/gpio.py new file mode 100644 index 0000000..471f457 --- /dev/null +++ b/raspi/gpio.py @@ -0,0 +1,35 @@ +import RPi.GPIO as GPIO +import argparse +from time import sleep + +def main(period): + + #Assign Pins + blower_pin_1=40 + blower_pin_2=38 + feeder_pin_1=37 + unused_board_pins = (3, 5, 7, 8, 10, 11, 12, 13, 15, 16, 18, 19, 21, 22, 23, 24, 26, 29, 31, 32, 33, 35, 36, 37, 38) + relay_pins = (blower_pin_1, blower_pin_2, feeder_pin_1) + + #Setup GPIO + GPIO.setmode(GPIO.BOARD) + GPIO.setwarnings(False) + GPIO.setup(unused_board_pins, GPIO.OUT, initial=True) + GPIO.setup(relay_pins, GPIO.OUT, initial=True) + + #Turn on PINs + GPIO.output(blower_pin_1, False) + GPIO.output(blower_pin_2, False) + + sleep(int(period)) + + #Turn off PINs + GPIO.output(blower_pin_1, True) + GPIO.output(blower_pin_2, True) + +if __name__=="__main__": + ap = argparse.ArgumentParser() + ap.add_argument("-p", "--period", required=True, type=str, help="Time in seconds to keep the pin On") + args = vars(ap.parse_args()) + period = args["period"] + main(period) diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 8b013d1..0000000 --- a/requirements.txt +++ /dev/null @@ -1,45 +0,0 @@ -appdirs==1.4.4 -audioread==2.1.9 -audiotsm==0.1.2 -certifi==2020.6.20 -cffi==1.14.4 -chardet==4.0.0 -decorator==4.4.2 -ffmpeg-python==0.2.0 -fire==0.3.1 -future==0.18.2 -idna==2.10 -imageio==2.9.0 -imageio-ffmpeg==0.4.3 -iso8601==0.1.13 -joblib==1.0.0 -librosa==0.8.0 -llvmlite==0.35.0 -lxml==4.6.2 -m3u8==0.8.0 -moviepy==1.0.3 -numba==0.52.0 -numpy==1.19.4 -packaging==20.8 -pikepdf==2.2.2 -Pillow==8.0.1 -pooch==1.3.0 -proglog==0.1.9 -psutil==5.8.0 -pycparser==2.20 -pyparsing==2.4.7 -PyPDF2==1.26.0 -pytube==10.4.1 -requests==2.25.1 -resampy==0.2.2 -scikit-learn==0.24.0 -scipy==1.5.4 -selenium==3.141.0 -six==1.15.0 -SoundFile==0.10.3.post1 -termcolor==1.1.0 -threadpoolctl==2.1.0 -tqdm==4.56.0 -typing-extensions==3.7.4.3 -urllib3==1.26.2 -whatsapp-web==0.0.1 diff --git a/sendGrid.py b/sendGrid.py new file mode 100644 index 0000000..868a231 --- /dev/null +++ b/sendGrid.py @@ -0,0 +1,20 @@ +# using SendGrid's Python Library +# https://github.com/sendgrid/sendgrid-python +import os + +from sendgrid import SendGridAPIClient +from sendgrid.helpers.mail import Mail + +message = Mail( + from_email='automate@innocule.co.in', + to_emails='jeeban.puhan@epsumlabs.in', + subject='Sending with Twilio SendGrid is Fun', + html_content='and easy to do anywhere, even with Python') +try: + sg = SendGridAPIClient(os.environ.get('SENDGRID_API_KEY')) + response = sg.send(message) + print(response.status_code) + print(response.body) + print(response.headers) +except Exception as e: + print(e.message) diff --git a/snap7/README.md b/snap7/README.md new file mode 100644 index 0000000..12a615a --- /dev/null +++ b/snap7/README.md @@ -0,0 +1,97 @@ +# Siemens S7 PLC Communication with Snap7 + +This directory contains a collection of Python scripts designed to interact with Siemens S7 PLCs (Programmable Logic Controllers) using the `python-snap7` library. These scripts provide examples for various operations such as reading from and writing to PLC memory, testing connections, and more. + +## Scripts Overview + +Below is a description of each script found in this directory: + +### `memory.py` + +This script defines a Python class `S71200` which simplifies interaction with an S7-1200 PLC. + +* **Purpose**: To provide a higher-level abstraction for reading and writing various data types to different memory areas of an S7-1200 PLC. +* **Class**: `S71200(ip, debug=False)` + * `ip` (str): The IP address of the PLC. + * `debug` (bool, optional): Enables debug printing if set to `True`. +* **Methods**: + * `getMem(mem, returnByte=False)`: Reads data from a specified memory location. + * `mem` (str): The memory address (e.g., 'MX0.0', 'MB0', 'MW0', 'MD0', 'FREAL0', 'QX0.0', 'IB0'). + * `returnByte` (bool, optional): If `True`, returns the raw byte array; otherwise, returns the parsed data type. + * `writeMem(mem, value)`: Writes data to a specified memory location. + * `mem` (str): The memory address. + * `value`: The value to write (appropriate for the specified memory type). +* **Supported Memory Areas**: + * `M` (Merkers / Memory Bits) + * `Q` (Outputs) + * `I` (Inputs) +* **Supported Data Types**: Boolean (bit), Byte (int), Word (int), DWord (int/dword), Real (float). +* **Example**: The script includes a commented-out example section (`if __name__=="__main__":`) demonstrating how to use the `S71200` class to read and write to PLC memory. + +### `snap7-PLC-test.py` + +This script provides functions to read and write to the Merker (MK) memory area of a PLC and includes a test case. + +* **Purpose**: To test basic read and write operations on a PLC's MK memory area. +* **Functions**: + * `ReadMemory(plc, byte, bit, datatype)`: Reads data from a specific address in the MK memory area. + * `WriteMemory(plc, byte, bit, datatype, value)`: Writes data to a specific address in the MK memory area. +* **Example**: The `if __name__=="__main__":` block connects to a PLC, reads a bit from memory address MK100.2, attempts to write to it, and prints the status. + +### `snap7-client.py` + +A simple client script to read real values from different memory areas of a PLC. + +* **Purpose**: To demonstrate connecting to a PLC and reading floating-point numbers from the MK (Merker), PA (Process Image Output), and PE (Process Image Input) memory areas. +* **Operation**: Connects, reads a 4-byte real value starting from address 0 in areas 0x83 (MK), 0x82 (PA), and 0x81 (PE), prints these values, and disconnects. + +### `snap7-read-write.py` + +This script demonstrates reading and writing a real (floating-point) value to the MK memory area of a PLC. + +* **Purpose**: To provide a clear example of reading and modifying a real value in the PLC's memory. +* **Functions**: + * `ReadMemory(plc, byte, bit, datatype)`: Reads data (focused on MK area due to `areas['MK']`). + * `WriteMemory(plc, byte, bit, datatype, value)`: Writes data (focused on MK area). +* **Example**: The script connects to a PLC, reads a real value from address 0 of the MK area, writes the value of Pi (3.141592) to the same location, and then reads it back to verify the write. + +### `snap7-test-connection.py` + +A very basic script to test the network connection to a PLC. + +* **Purpose**: To quickly verify if a connection can be established with the PLC at the specified IP address. +* **Operation**: Attempts to connect to the PLC, prints whether the connection was successful, and then disconnects. + +### `snap7-write-input.py` + +This script demonstrates reading from and writing to the Process Image Input (PE) area of a PLC. Writing to inputs is typically used for simulation or forcing values. + +* **Purpose**: To show how to interact with the PLC's input image table. +* **Functions**: + * `ReadMemory(plc, byte, bit, datatype)`: Reads data from the PE memory area (`areas['PE']`). + * `WriteMemory(plc, byte, bit, datatype, value)`: Writes data to the PE memory area. +* **Example**: The script connects to a PLC, reads a bit from input address 0.0 (e.g., I0.0), writes a '1' to it, and then reads it back to show the change in the process image. + +## Prerequisites + +* **Python 3.x** +* **`python-snap7` library**: This library is essential for communication with the S7 PLCs. It can be installed via pip: + ```bash + pip install python-snap7 + ``` +* **Siemens S7 PLC**: You need access to a compatible Siemens S7 PLC (e.g., S7-1200, S7-1500, S7-300, S7-400) configured with an IP address on the same network as the machine running these scripts. The PLC should also be configured to allow PUT/GET communication from external partners (often a setting in the PLC's hardware configuration or protection settings). + +## Usage + +1. **Configure IP Addresses**: Before running any script, you will likely need to modify the PLC's IP address within the script. Most scripts have a line like `plc.connect('192.168.1.X', 0, 1)` where `'192.168.1.X'` should be replaced with your PLC's actual IP address. +2. **Run from Command Line**: Open a terminal or command prompt, navigate to this `snap7` directory, and execute the desired script using Python: + ```bash + python .py + ``` + For example: + ```bash + python snap7-test-connection.py + ``` +3. **Observe Output**: The scripts will print information to the console regarding their operations, such as connection status, data read, or confirmation of data written. + +**Note**: Ensure your network configuration allows communication between your computer and the PLC on the necessary ports (typically TCP port 102 for S7 communication). diff --git a/video/audio2video.py b/video/audio2video.py new file mode 100644 index 0000000..7d53891 --- /dev/null +++ b/video/audio2video.py @@ -0,0 +1,37 @@ +from moviepy.editor import * +from os import listdir +from os.path import isfile, join + +def convert(inDir, imagePath, outDir): + + onlyfiles = [f for f in listdir(inDir) if isfile(join(inDir, f))] + + for mp3File in onlyfiles: + if(mp3File.endswith(".mp3")): + filePath = inDir + mp3File + audio = AudioFileClip(filePath) + clip = ImageClip(imagePath).set_duration(audio.duration) + fileName = outDir + os.path.splitext(mp3File)[0] + '.mp4' + clip = clip.set_audio(audio) + clip.write_videofile(fileName, fps=0.1) + +def joinVideo(inputdirectory): + + onlyfiles = [f for f in listdir(inputdirectory) if isfile(join(inputdirectory, f))] + + clips = [] + for videofile in onlyfiles: + if(videofile.endswith(".mp4")): + filepath = inputdirectory + videofile + clip = VideoFileClip(filepath) + clips.append(clip) + + final_clip = concatenate_videoclips(clips) + final_clip.write_videofile(inputdirectory+'joined-video.mp4') + +if __name__=="__main__": + inDir = r"C:\Users\anshul\Documents\\" + imagePath = r"C:\Users\anshul\Documents\myimage.jpg" + outDir = r"C:\Users\anshul\Documents\\" + convert(inDir, imagePath, outDir) + joinVideo(outDir) diff --git a/checkFPS.py b/video/checkFPS.py similarity index 100% rename from checkFPS.py rename to video/checkFPS.py diff --git a/extractFrames.py b/video/extractFrames.py similarity index 100% rename from extractFrames.py rename to video/extractFrames.py diff --git a/video/ipcam.py b/video/ipcam.py new file mode 100644 index 0000000..d7c4c37 --- /dev/null +++ b/video/ipcam.py @@ -0,0 +1,54 @@ +import cv2, base64, requests, json, os, re, argparse, threading, logging +from logging.handlers import RotatingFileHandler +from os.path import expanduser + +import numpy as np +from time import sleep + +formatter = logging.Formatter("[%(levelname)s] (%(asctime)s) {%(filename)s:%(lineno)d} | %(name)s | %(message)s") +log_file_path = expanduser("~") + '/logs/ipcam.log' +handler = RotatingFileHandler(log_file_path, maxBytes=1000000, backupCount=2) +handler.setLevel(logging.DEBUG) +handler.setFormatter(formatter) +logger = logging.getLogger('IPCam') +logger.addHandler(handler) +logger.setLevel(logging.DEBUG) + + +#url = 'http://localhost:8888/image/base64/' +url = 'http://one.innocule.tech/image/base64/' + +def main(delay): + ipcamThread01 = threading.Thread(target=ipcam_1, args=("rtsp://admin:innocule7@192.168.1.64", "cam_01", delay)) + ipcamThread02 = threading.Thread(target=ipcam_1, args=("rtsp://admin:innocule7@192.168.1.64", "cam_02", delay)) + ipcamThread01.start() + ipcamThread02.start() + +def ipcam_1(camUrl, camName, delay): + logger.debug('Thread started for '+camName) + cap = cv2.VideoCapture(camUrl) + while(True): + cap = cv2.VideoCapture(camUrl) + retval, image = cap.read() + _, im_arr = cv2.imencode('.jpg', image) + jpg_as_text = base64.b64encode(np.array(im_arr).tostring()).decode('utf-8') + #print(jpg_as_text[0:50]) + data = { 'image' : str(jpg_as_text), 'source' : camName} + headers = {'Content-type': 'application/json', 'Authorization':'Basic YWRtaW46SW5ub2NAMTIz'} + try: + response = requests.post(url, json = data, headers = headers) + logger.debug('Response from cloud '+response.text+ 'for cam: ' +camName) + except: + logger.error("Error in sending request for cam: "+camName, exc_info=1) + sleep(int(delay)) + + cap.release() + cv2.destroyAllWindows() + +if __name__=="__main__": + logger.debug('='*5+" Starting Applicaiton "+'='*5) + ap = argparse.ArgumentParser() + ap.add_argument("-d", "--framedelay", required=True, type=str, help="Take frame every these many seconds") + args = vars(ap.parse_args()) + delay = args["framedelay"] + main(delay) \ No newline at end of file diff --git a/joinSplitVideo.py b/video/joinSplitVideo.py similarity index 89% rename from joinSplitVideo.py rename to video/joinSplitVideo.py index 2683528..d579da2 100755 --- a/joinSplitVideo.py +++ b/video/joinSplitVideo.py @@ -5,7 +5,7 @@ def main(): inputFile = "/Users/anshul/Downloads/output/to-cut.mp4" outputLocation = "/Users/anshul/Downloads/output/" - split(inputFile, outputLocation) + # split(inputFile, outputLocation) join() @@ -17,11 +17,11 @@ def split(vidfile, output): def join(): folder_prefix = '/Users/anshul/Downloads/output/' - numbers = re.compile(r'(\d+)') + # numbers = re.compile(r'(\d+)') # this two lines are for loading the videos. In this case the video are named as: cut1.mp4, cut2.mp4, ..., cut15.mp4 videofiles = [n for n in os.listdir(folder_prefix) if n[0]=='o' and n[-4:]=='.avi'] - videofiles = sorted(videofiles, key=numericalSort) + # videofiles = sorted(videofiles, key=numericalSort) print(videofiles) video_index = 0 @@ -33,7 +33,7 @@ def join(): final_clip = concatenate_videoclips(clips) final_clip.write_videofile(folder_prefix+'joined-video.mp4') - print "end." + print("end.") def numericalSort(value): diff --git a/video/joinVideos.py b/video/joinVideos.py new file mode 100644 index 0000000..4effb0d --- /dev/null +++ b/video/joinVideos.py @@ -0,0 +1,12 @@ +from moviepy.editor import * + +# getting subclip as video is large +clip1 = VideoFileClip("/Users/anshul/Downloads/fal-aur-bandariya.mp4") +# clip2 = VideoFileClip("/Users/anshul/Downloads/fal-aur-bandariya.mp4") +# clip3 = VideoFileClip("/Users/anshul/Downloads/PAF_new_web_arch_training_part-3.mp4") +# clip4 = VideoFileClip("/Users/anshul/Downloads/output/large-4k-video-4.mp4") + +# concatenating both the clips +final = concatenate_videoclips([clip1, clip1]) +#writing the video into a file / saving the combined video +final.write_videofile("/Users/anshul/Downloads/fal-aur-bandariya-long.mp4") \ No newline at end of file diff --git a/removeSilenceFromVideo.py b/video/removeSilenceFromVideo.py similarity index 77% rename from removeSilenceFromVideo.py rename to video/removeSilenceFromVideo.py index ff4cc9c..b9b65f6 100644 --- a/removeSilenceFromVideo.py +++ b/video/removeSilenceFromVideo.py @@ -3,4 +3,4 @@ video = Video() -video.humanly_remove_silence_parts_from_video("/ap.mp4", "/ap-silence.mp4", 25, minimum_interval=0.7) \ No newline at end of file +video.humanly_remove_silence_parts_from_video("/ap.mp4", "/ap-silence.mp4", 25, minimum_interval=0.7) \ No newline at end of file diff --git a/videoStreamingRaspi.py b/video/videoStreamingRaspi.py similarity index 100% rename from videoStreamingRaspi.py rename to video/videoStreamingRaspi.py