From fd1f1c10fb113d79ef5057d897ea985c222989aa Mon Sep 17 00:00:00 2001 From: foglar Date: Fri, 8 Nov 2024 09:02:58 +0100 Subject: [PATCH] Initialization of project --- main.py | 159 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 main.py diff --git a/main.py b/main.py new file mode 100644 index 0000000..c389f20 --- /dev/null +++ b/main.py @@ -0,0 +1,159 @@ +import json +import httpx +import logging + +logging.basicConfig(level=logging.WARNING) + +class Profile: + def __init__(self, username: str) -> None: + """ + Initialize a Profile instance to scrape data from Instagram user. + + Args: + username (str): The username of the Instagram profile to scrape. + """ + self.username = username + self.user_client = httpx.Client( + headers={ + "x-ig-app-id": "936619743392459", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36", + "Accept-Language": "en-US,en;q=0.9", + "Accept-Encoding": "gzip, deflate, br", + "Accept": "*/*", + "Cookie": "sessionid=50347012053%3AyyygawcCJZRigQ%3A1%3AAYdTy1-Y086Kr6-SoHIudLxocmEw6SfIrwYpJvF6ww; csrftoken=HJidkBXRjkGQEJz1vPj84TETkbuc72Gs; ds_user_id=50347012053", + "X-CSRFToken": "HJidkBXRjkGQEJz1vPj84TETkbuc72Gs" + } + ) + self.user_client_no_cookies = httpx.Client( + headers={ + "x-ig-app-id": "936619743392459", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36", + "Accept-Language": "en-US,en;q=0.9", + "Accept-Encoding": "gzip, deflate, br", + "Accept": "*/*", + } + ) + self.user_profile_data = self.scrape_user_profile() + self.user_id = self.get_user_id() + logging.info(f"User identification number is: {self.user_id}") + if self.is_user_private(): + logging.info("User is private.") + + def scrape_user_profile(self) -> dict: + """Scrape Instagram user's profile data and return it as a dictionary.""" + try: + response = self.user_client_no_cookies.get( + f"https://i.instagram.com/api/v1/users/web_profile_info/?username={self.username}" + ) + response.raise_for_status() # Ensure the request was successful + return response.json() + except httpx.HTTPStatusError as e: + logging.error(f"Failed to retrieve profile data: {e}") + return {} + + def get_user_id(self) -> str: + """Extract and return the user ID from the profile data.""" + return self.user_profile_data.get("data", {}).get("user", {}).get("id", "") + + def is_user_private(self) -> bool: + """Get information about if user is private or not + + Returns: + bool: true if user profile is private, false is user profile is public + """ + data = self.user_profile_data.get("data", {}).get("user", {}).get("is_private", "") + if data == "False": + return False + else: + return True + + def is_user_verified(self) -> bool: + data = self.user_profile_data.get("data", {}).get("user", {}).get("is_verified", "") + if data == "False": + return False + else: + return True + + def get_user_data(self, download=True) -> dict: + user_data = self.user_profile_data.get("data", {}).get("user", {}) + if download == True: + with open(f"user_data_{self.username}_{self.user_id}.json", "w") as file: + file.write(json.dumps(user_data)) + logging.info("File saved successfully!") + return user_data + + def Get_user_bio(self) -> str: + """Get bio of user""" + self.user_bio = self.user_profile_data.get("data", {}).get("user", {}).get("biography","") + return self.user_bio + + def Get_followers_count(self) -> int: + """Retrieve and return the follower count from profile data.""" + return self.user_profile_data.get("data", {}).get("user", {}).get("edge_followed_by", {}).get("count", 0) + + def Get_following_count(self) -> int: + """Retrieve and return the following count from profile data.""" + return self.user_profile_data.get("data", {}).get("user", {}).get("edge_follow", {}).get("count", 0) + + def Get_followers(self) -> dict: + pass + + def Get_following(self) -> dict: + """Scrape the list of users that the user is following and return their usernames and full names as a dictionary.""" + try: + response = self.user_client.get( + f"https://www.instagram.com/api/v1/friendships/{self.user_id}/following/" + ) + response.raise_for_status() + data = response.json() + logging.debug(data) + + # Create a dictionary with the usernames as keys and full names as values + return { + user.get("username"): user.get("full_name") + for user in data.get("users", []) + } + except Exception as e: + logging.error(f"An error occurred: {e}") + return {} + + def Get_external_url(self): + return self.user_profile_data.get("data", {}).get("user", {}).get("external_url", "") + + def Get_profile_picture(self, hd=True, download=True): + if hd: + tag = "profile_pic_url_hd" + else: + tag = "profile_pic_url" + + self.profile_img = self.user_profile_data.get("data", {}).get("user", {}).get(tag, {}) + + if download: + image = self.user_client.get(self.profile_img) + if image.status_code == 200: + with open(f"profile_{self.username}_{self.user_id}.jpg", "wb") as file: + file.write(image.content) + logging.info("Image downloaded successfully!") + else: + logging.error("Failed to download image") + return self.profile_img + + +if __name__ == "__main__": + profile = Profile("ittrendy.cz") + #print(profile.Get_user_bio()) + print(profile.Get_external_url()) + profile.get_user_data() + profile.Get_profile_picture() + following = profile.Get_following() + i = 0 + for user, username in following.items(): + i+=1 + print(f"{i}. User: \"{user}\" \t\t Fullname: {username}") + #followers_count = profile.Get_followers_count() + #following_count = profile.Get_following_count() + #following_usernames = profile.Get_following() + #profile.Get_profile_picture(hd=True) + #print(f"{profile.username} has {followers_count} followers.") + #print(f"{profile.username} is following {following_count} users.") + #print(f"Usernames of accounts {profile.username} is following: {following_usernames}")