New Page 3 | My Portfolio

Scraping Instagram Comments with caption:

def get_ig_comments(post_url, username):
L = instaloader.Instaloader()

L.load_session_from_file(username, f"/Users/(username)/.config/instaloader/session-{username}")

shortcode = post_url.split("/")[-2]

post = instaloader.Post.from_shortcode(L.context, shortcode)

caption = post.caption if post.caption else "No caption"

comments_data = []

for count, comment in enumerate(post.get_comments()):

comments_data.append((comment.owner.username, comment.text,caption))
if (count + 1) % 100 == 0:

time.sleep(60)

return comments_data

Results of this script:

Fetches Post Captions, Scrapes Comments by automatically pauses after every 100 comments to comply with Instagram’s API rate limits and saves the extracted data in a structured CSV file for easy analysis.

View Full Code

Extracting Text from Images (OCR):

def process_image_for_description(image_path):

"""Extract and clean text from image using Tesseract OCR."""

image = cv2.imread(image_path)

if image is None:

return "Error: Unable to load image."
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

_, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)

inverted = cv2.bitwise_not(thresh)
custom_config = r'--oem 3 --psm 6'

raw_text = pytesseract.image_to_string(inverted, config=custom_config)

return raw_text if raw_text.strip() else "No readable text found in the image."

View Full Code

Conversion of Image from text using BLIP:

def process_image_for_blip_description(image_path):

"""Generate a caption for the image using the BLIP model."""

try:

image = Image.open(image_path)

except Exception as e:

return f"Error: Unable to load image. {str(e)}"

inputs = processor(images=image, return_tensors="pt")

with torch.no_grad():

out = caption_model.generate(**inputs)

description = processor.decode(out[0], skip_special_tokens=True)

return description

View Full Code

Downloading reels , extracting audio and converting to text:

View Full Code

Important code snippets:

Contact